From 3143e95353072523ff5d9c977a474a87fe3cbe57 Mon Sep 17 00:00:00 2001
From: Tobias Droste <tdroste@gmx.de>
Date: Sat, 16 Jul 2011 19:40:47 +0200
Subject: [PATCH 001/600] llvmpipe: fix build with LLVM 3.0svn

LLVM 3.0svn introduced a new type system. It defines a new way to create
named structs and removes the (now not needed) LLVMInvalidateStructLayout
function.  See revision 134829 of LLVM.

Signed-off-by: Tobias Droste <tdroste@gmx.de>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/draw/draw_llvm.c | 48 +++++++++++++++++++-------
 src/gallium/drivers/llvmpipe/lp_jit.c  | 19 +++++++---
 2 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 8bb87440497..996e295e4b5 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -96,7 +96,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
  * Create LLVM type for struct draw_jit_texture
  */
 static LLVMTypeRef
-create_jit_texture_type(struct gallivm_state *gallivm)
+create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name)
 {
    LLVMTargetDataRef target = gallivm->target;
    LLVMTypeRef texture_type;
@@ -120,13 +120,21 @@ create_jit_texture_type(struct gallivm_state *gallivm)
    elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = 
       LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
 
+#if HAVE_LLVM >= 0x0300
+   texture_type = LLVMStructCreateNamed(gallivm->context, struct_name);
+   LLVMStructSetBody(texture_type, elem_types,
+                     Elements(elem_types), 0);
+#else
    texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                           Elements(elem_types), 0);
 
+   LLVMAddTypeName(gallivm->module, struct_name, texture_type);
+
    /* Make sure the target's struct layout cache doesn't return
     * stale/invalid data.
     */
    LLVMInvalidateStructLayout(gallivm->target, texture_type);
+#endif
 
    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
                           target, texture_type,
@@ -176,7 +184,7 @@ create_jit_texture_type(struct gallivm_state *gallivm)
  */
 static LLVMTypeRef
 create_jit_context_type(struct gallivm_state *gallivm,
-                        LLVMTypeRef texture_type)
+                        LLVMTypeRef texture_type, const char *struct_name)
 {
    LLVMTargetDataRef target = gallivm->target;
    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
@@ -189,11 +197,17 @@ create_jit_context_type(struct gallivm_state *gallivm,
    elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */
    elem_types[4] = LLVMArrayType(texture_type,
                                  PIPE_MAX_VERTEX_SAMPLERS); /* textures */
-
+#if HAVE_LLVM >= 0x0300
+   context_type = LLVMStructCreateNamed(gallivm->context, struct_name);
+   LLVMStructSetBody(context_type, elem_types,
+                     Elements(elem_types), 0);
+#else
    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                           Elements(elem_types), 0);
+   LLVMAddTypeName(gallivm->module, struct_name, context_type);
 
    LLVMInvalidateStructLayout(gallivm->target, context_type);
+#endif
 
    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
                           target, context_type, 0);
@@ -215,7 +229,7 @@ create_jit_context_type(struct gallivm_state *gallivm,
  * Create LLVM type for struct pipe_vertex_buffer
  */
 static LLVMTypeRef
-create_jit_vertex_buffer_type(struct gallivm_state *gallivm)
+create_jit_vertex_buffer_type(struct gallivm_state *gallivm, const char *struct_name)
 {
    LLVMTargetDataRef target = gallivm->target;
    LLVMTypeRef elem_types[3];
@@ -225,10 +239,17 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm)
    elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
    elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */
 
+#if HAVE_LLVM >= 0x0300
+   vb_type = LLVMStructCreateNamed(gallivm->context, struct_name);
+   LLVMStructSetBody(vb_type, elem_types,
+                     Elements(elem_types), 0);
+#else
    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                      Elements(elem_types), 0);
+   LLVMAddTypeName(gallivm->module, struct_name, vb_type);
 
    LLVMInvalidateStructLayout(gallivm->target, vb_type);
+#endif
 
    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
                           target, vb_type, 0);
@@ -258,10 +279,17 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
    elem_types[1]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
    elem_types[2]  = LLVMArrayType(elem_types[1], data_elems);
 
+#if HAVE_LLVM >= 0x0300
+   vertex_header = LLVMStructCreateNamed(gallivm->context, struct_name);
+   LLVMStructSetBody(vertex_header, elem_types,
+                     Elements(elem_types), 0);
+#else
    vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
                                            Elements(elem_types), 0);
+   LLVMAddTypeName(gallivm->module, struct_name, vertex_header);
 
    LLVMInvalidateStructLayout(gallivm->target, vertex_header);
+#endif
 
    /* these are bit-fields and we can't take address of them
       LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
@@ -284,8 +312,6 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
                           target, vertex_header,
                           DRAW_JIT_VERTEX_DATA);
 
-   LLVMAddTypeName(gallivm->module, struct_name, vertex_header);
-
    return vertex_header;
 }
 
@@ -299,19 +325,15 @@ create_jit_types(struct draw_llvm *llvm)
    struct gallivm_state *gallivm = llvm->gallivm;
    LLVMTypeRef texture_type, context_type, buffer_type, vb_type;
 
-   texture_type = create_jit_texture_type(gallivm);
-   LLVMAddTypeName(gallivm->module, "texture", texture_type);
+   texture_type = create_jit_texture_type(gallivm, "texture");
 
-   context_type = create_jit_context_type(gallivm, texture_type);
-   LLVMAddTypeName(gallivm->module, "draw_jit_context", context_type);
+   context_type = create_jit_context_type(gallivm, texture_type, "draw_jit_context");
    llvm->context_ptr_type = LLVMPointerType(context_type, 0);
 
    buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
-   LLVMAddTypeName(gallivm->module, "buffer", buffer_type);
    llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
 
-   vb_type = create_jit_vertex_buffer_type(gallivm);
-   LLVMAddTypeName(gallivm->module, "pipe_vertex_buffer", vb_type);
+   vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
    llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 268f0fa034b..ce92a80721a 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -68,10 +68,17 @@ lp_jit_create_types(struct llvmpipe_context *lp)
       elem_types[LP_JIT_TEXTURE_BORDER_COLOR] = 
          LLVMArrayType(LLVMFloatTypeInContext(lc), 4);
 
+#if HAVE_LLVM >= 0x0300
+   texture_type = LLVMStructCreateNamed(gallivm->context, "texture");
+   LLVMStructSetBody(texture_type, elem_types,
+                     Elements(elem_types), 0);
+#else
       texture_type = LLVMStructTypeInContext(lc, elem_types,
                                              Elements(elem_types), 0);
+      LLVMAddTypeName(gallivm->module, "texture", texture_type);
 
       LLVMInvalidateStructLayout(gallivm->target, texture_type);
+#endif
 
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
                              gallivm->target, texture_type,
@@ -112,8 +119,6 @@ lp_jit_create_types(struct llvmpipe_context *lp)
 
       LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
                            gallivm->target, texture_type);
-
-      LLVMAddTypeName(gallivm->module, "texture", texture_type);
    }
 
    /* struct lp_jit_context */
@@ -129,11 +134,19 @@ lp_jit_create_types(struct llvmpipe_context *lp)
       elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
                                                       PIPE_MAX_SAMPLERS);
 
+#if HAVE_LLVM >= 0x0300
+   context_type = LLVMStructCreateNamed(gallivm->context, "context");
+   LLVMStructSetBody(context_type, elem_types,
+                     Elements(elem_types), 0);
+#else
       context_type = LLVMStructTypeInContext(lc, elem_types,
                                              Elements(elem_types), 0);
 
       LLVMInvalidateStructLayout(gallivm->target, context_type);
 
+      LLVMAddTypeName(gallivm->module, "context", context_type);
+#endif
+
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants,
                              gallivm->target, context_type,
                              LP_JIT_CTX_CONSTANTS);
@@ -155,8 +168,6 @@ lp_jit_create_types(struct llvmpipe_context *lp)
       LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
                            gallivm->target, context_type);
 
-      LLVMAddTypeName(gallivm->module, "context", context_type);
-
       lp->jit_context_ptr_type = LLVMPointerType(context_type, 0);
    }
 

From b38c26f19f529a0d0b90524cab944cf2c8a3f560 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 08:42:46 -0600
Subject: [PATCH 002/600] llvmpipe: include LLVM version number in name string

---
 src/gallium/drivers/llvmpipe/lp_screen.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 4b2ae1436ea..e3f8c19679f 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -93,7 +93,9 @@ llvmpipe_get_vendor(struct pipe_screen *screen)
 static const char *
 llvmpipe_get_name(struct pipe_screen *screen)
 {
-   return "llvmpipe";
+   static char buf[100];
+   snprintf(buf, sizeof(buf), "llvmpipe (LLVM 0x%x)", HAVE_LLVM);
+   return buf;
 }
 
 

From fb5ff51f422e1718c09da01f3c5bb5baecc9d68e Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 19 Jul 2011 12:20:14 -0700
Subject: [PATCH 003/600] i965: Fix regression in
 29a911c50e4443dfebef0a2e32c39b64992fa3cc.

The previous define was the full 32-bit header, while the new define
was just the top 16 bits.
---
 src/mesa/drivers/dri/i965/brw_misc_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 03cebbb824b..f7e6e7c81d1 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -46,7 +46,7 @@ static void upload_drawing_rect(struct brw_context *brw)
    struct gl_context *ctx = &intel->ctx;
 
    BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE);
+   OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
    OUT_BATCH(0); /* xmin, ymin */
    OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
 	    ((ctx->DrawBuffer->Height - 1) << 16));

From f7dbcba280e4397cadb14f230aa925b4143cdde4 Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Mon, 18 Jul 2011 00:37:45 -0700
Subject: [PATCH 004/600] intel: Fix stencil buffer to be W tiled

Until now, the stencil buffer was allocated as a Y tiled buffer, because
in several locations the PRM states that it is. However, it is actually
W tiled. From the PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section
4.5.2.1 W-Major Format:
    W-Major Tile Format is used for separate stencil.

The GTT is incapable of W fencing, so we allocate the stencil buffer with
I915_TILING_NONE and decode the tile's layout in software.

This fix touches the following portions of code:
    - In intel_allocate_renderbuffer_storage(), allocate the stencil
      buffer with I915_TILING_NONE.
    - In intel_verify_dri2_has_hiz(), verify that the stencil buffer is
      not tiled.
    - In the stencil buffer's span functions, the tile's layout must be
      decoded in software.

This commit mutually depends on the xf86-video-intel commit
    dri: Do not tile stencil buffer
    Author: Chad Versace <chad@chad-versace.us>
    Date:   Mon Jul 18 00:38:00 2011 -0700

On Gen6 with separate stencil enabled, fixes the following Piglit tests:
    bugs/fdo23670-drawpix_stencil
    general/stencil-drawpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-copypixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-drawpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX16-readpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-copypixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-drawpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX1-readpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-copypixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-drawpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX4-readpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-copypixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-drawpixels
    spec/EXT_framebuffer_object/fbo-stencil-GL_STENCIL_INDEX8-readpixels
    spec/EXT_packed_depth_stencil/fbo-stencil-GL_DEPTH24_STENCIL8-copypixels
    spec/EXT_packed_depth_stencil/fbo-stencil-GL_DEPTH24_STENCIL8-readpixels
    spec/EXT_packed_depth_stencil/readpixels-24_8

Note: This is a candidate for the 7.11 branch.

Signed-off-by: Chad Versace <chad@chad-versace.us>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/intel/intel_clear.c   |  6 ++
 src/mesa/drivers/dri/intel/intel_context.c |  9 ++-
 src/mesa/drivers/dri/intel/intel_fbo.c     | 12 +--
 src/mesa/drivers/dri/intel/intel_screen.h  |  9 ++-
 src/mesa/drivers/dri/intel/intel_span.c    | 88 ++++++++++++++++------
 5 files changed, 93 insertions(+), 31 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index dfca03c14bf..5ab98734cfc 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -143,6 +143,12 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
 	     */
             tri_mask |= BUFFER_BIT_STENCIL;
          }
+	 else if (intel->has_separate_stencil &&
+	       stencilRegion->tiling == I915_TILING_NONE) {
+	    /* The stencil buffer is actually W tiled, which the hardware
+	     * cannot blit to. */
+	    tri_mask |= BUFFER_BIT_STENCIL;
+	 }
          else {
             /* clearing all stencil bits, use blitting */
             blit_mask |= BUFFER_BIT_STENCIL;
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 2ba13632569..fe8be082dfc 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -1439,7 +1439,12 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
       assert(stencil_rb->Base.Format == MESA_FORMAT_S8);
       assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24);
 
-      if (stencil_rb->region->tiling == I915_TILING_Y) {
+      if (stencil_rb->region->tiling == I915_TILING_NONE) {
+	 /*
+	  * The stencil buffer is actually W tiled. The region's tiling is
+	  * I915_TILING_NONE, however, because the GTT is incapable of W
+	  * fencing.
+	  */
 	 intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE;
 	 return;
       } else {
@@ -1527,7 +1532,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
        * Presently, however, no verification or clean up is necessary, and
        * execution should not reach here. If the framebuffer still has a hiz
        * region, then we have already set dri2_has_hiz to true after
-       * confirming above that the stencil buffer is Y tiled.
+       * confirming above that the stencil buffer is W tiled.
        */
       assert(0);
    }
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 55bcc757873..35be3257ab3 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -173,6 +173,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
 
    if (irb->Base.Format == MESA_FORMAT_S8) {
       /*
+       * The stencil buffer is W tiled. However, we request from the kernel a
+       * non-tiled buffer because the GTT is incapable of W fencing.
+       *
        * The stencil buffer has quirky pitch requirements.  From Vol 2a,
        * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
        *    The pitch must be set to 2x the value computed based on width, as
@@ -180,14 +183,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
        * To accomplish this, we resort to the nasty hack of doubling the drm
        * region's cpp and halving its height.
        *
-       * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt()
-       * maps the memory incorrectly.
+       * If we neglect to double the pitch, then render corruption occurs.
        */
       irb->region = intel_region_alloc(intel->intelScreen,
-				       I915_TILING_Y,
+				       I915_TILING_NONE,
 				       cpp * 2,
-				       width,
-				       height / 2,
+				       ALIGN(width, 64),
+				       ALIGN((height + 1) / 2, 64),
 				       GL_TRUE);
       if (!irb->region)
 	return false;
diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h
index b2013af1a29..9dd6a525566 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.h
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@@ -63,9 +63,12 @@
  * x8_z24 and s8).
  *
  * Eventually, intel_update_renderbuffers() makes a DRI2 request for
- * DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled,
- * then we joyfully set intel_screen.dri2_has_hiz to true and continue as if
- * nothing happend.
+ * DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is
+ * I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to
+ * true and continue as if nothing happend.
+ *
+ * [1] The stencil buffer is actually W tiled. However, we request from the
+ *     kernel a non-tiled buffer because the GTT is incapable of W fencing.
  *
  * If the buffers are X tiled, however, the handshake has failed and we must
  * clean up.
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index 153803fba09..2e1c80c4766 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -131,38 +131,84 @@ intel_set_span_functions(struct intel_context *intel,
    int miny = 0;							\
    int maxx = rb->Width;						\
    int maxy = rb->Height;						\
-   int stride = rb->RowStride;						\
-   uint8_t *buf = rb->Data;						\
+									\
+   /*									\
+    * Here we ignore rb->Data and rb->RowStride as set by		\
+    * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile	\
+    * manually, the region's *real* base address and stride is		\
+    * required.								\
+    */									\
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
+   uint8_t *buf = irb->region->buffer->virtual;				\
+   unsigned stride = irb->region->pitch;				\
+   unsigned height = 2 * irb->region->height;				\
+   bool flip = rb->Name == 0;						\
+   int y_scale = flip ? -1 : 1;						\
+   int y_bias = flip ? (height - 1) : 0;				\
 
-/* Don't flip y. */
 #undef Y_FLIP
-#define Y_FLIP(y) y
+#define Y_FLIP(y) (y_scale * (y) + y_bias)
 
 /**
  * \brief Get pointer offset into stencil buffer.
  *
- * The stencil buffer interleaves two rows into one. Yay for crazy hardware.
- * The table below demonstrates how the pointer arithmetic behaves for a buffer
- * with positive stride (s=stride).
- *
- *     x    | y     | byte offset
- *     --------------------------
- *     0    | 0     | 0
- *     0    | 1     | 1
- *     1    | 0     | 2
- *     1    | 1     | 3
- *     ...  | ...   | ...
- *     0    | 2     | s
- *     0    | 3     | s + 1
- *     1    | 2     | s + 2
- *     1    | 3     | s + 3
+ * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
+ * must decode the tile's layout in software.
  *
+ * See
+ *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
+ *     Format.
+ *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
  *
+ * Even though the returned offset is always positive, the return type is
+ * signed due to
+ *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
+ *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
  */
 static inline intptr_t
-intel_offset_S8(int stride, GLint x, GLint y)
+intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y)
 {
-   return 2 * ((y / 2) * stride + x) + y % 2;
+   uint32_t tile_size = 4096;
+   uint32_t tile_width = 64;
+   uint32_t tile_height = 64;
+   uint32_t row_size = 64 * stride;
+
+   uint32_t tile_x = x / tile_width;
+   uint32_t tile_y = y / tile_height;
+
+   /* The byte's address relative to the tile's base addres. */
+   uint32_t byte_x = x % tile_width;
+   uint32_t byte_y = y % tile_height;
+
+   uintptr_t u = tile_y * row_size
+               + tile_x * tile_size
+               + 512 * (byte_x / 8)
+               +  64 * (byte_y / 8)
+               +  32 * ((byte_y / 4) % 2)
+               +  16 * ((byte_x / 4) % 2)
+               +   8 * ((byte_y / 2) % 2)
+               +   4 * ((byte_x / 2) % 2)
+               +   2 * (byte_y % 2)
+               +   1 * (byte_x % 2);
+
+   /*
+    * Errata for Gen5:
+    *
+    * An additional offset is needed which is not documented in the PRM.
+    *
+    * if ((byte_x / 8) % 2 == 1) {
+    *    if ((byte_y / 8) % 2) == 0) {
+    *       u += 64;
+    *    } else {
+    *       u -= 64;
+    *    }
+    * }
+    *
+    * The offset is expressed more tersely as
+    * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1));
+    */
+
+   return u;
 }
 
 #define WRITE_STENCIL(x, y, src)  buf[intel_offset_S8(stride, x, y)] = src;

From 0f20e2e18f902b4319851643e1775a18c2aacb3d Mon Sep 17 00:00:00 2001
From: Henri Verbeet <hverbeet@gmail.com>
Date: Mon, 18 Jul 2011 00:42:27 +0200
Subject: [PATCH 005/600] glx: Avoid calling __glXInitialize() in
 driReleaseDrawables().

This fixes a regression introduced by commit
a26121f37530619610a78a5fbe5ef87e44047fda (fd.o bug #39219).

Since the __glXInitialize() call should be unnecessary anyway, this is
probably a nicer fix for the original problem too.

NOTE: This is a candidate for the 7.10 and 7.11 branches.

Signed-off-by: Henri Verbeet <hverbeet@gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Tested-by: padfoot@exemail.com.au
---
 src/glx/dri_common.c |  2 +-
 src/glx/glxext.c     | 13 ++++---------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
index bac0c9e5911..e7dba5a68de 100644
--- a/src/glx/dri_common.c
+++ b/src/glx/dri_common.c
@@ -388,7 +388,7 @@ driFetchDrawable(struct glx_context *gc, GLXDrawable glxDrawable)
 _X_HIDDEN void
 driReleaseDrawables(struct glx_context *gc)
 {
-   struct glx_display *const priv = __glXInitialize(gc->psc->dpy);
+   const struct glx_display *priv = gc->psc->display;
    __GLXDRIdrawable *pdraw;
 
    if (priv == NULL)
diff --git a/src/glx/glxext.c b/src/glx/glxext.c
index 8704c484f96..8254544d1c0 100644
--- a/src/glx/glxext.c
+++ b/src/glx/glxext.c
@@ -260,25 +260,20 @@ glx_display_free(struct glx_display *priv)
 static int
 __glXCloseDisplay(Display * dpy, XExtCodes * codes)
 {
-   struct glx_display *priv, **prev, *next;
+   struct glx_display *priv, **prev;
 
    _XLockMutex(_Xglobal_lock);
    prev = &glx_displays;
    for (priv = glx_displays; priv; prev = &priv->next, priv = priv->next) {
       if (priv->dpy == dpy) {
+         *prev = priv->next;
 	 break;
       }
    }
-
-   /* Only remove the display from the list after it's destroyed. The cleanup
-    * code (e.g. driReleaseDrawables()) ends up calling __glXInitialize(),
-    * which would create a new glx_display while we're trying to destroy this
-    * one. */
-   next = priv->next;
-   glx_display_free(priv);
-   *prev = next;
    _XUnlockMutex(_Xglobal_lock);
 
+   glx_display_free(priv);
+
    return 1;
 }
 

From f0e306c3430e4d8f6c8e085537807007a488f1e2 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 15:24:47 -0600
Subject: [PATCH 006/600] mesa: update, shorten some comments in dd.h

---
 src/mesa/main/dd.h | 51 ++++++++++++----------------------------------
 1 file changed, 13 insertions(+), 38 deletions(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 9fe6d527f92..e1ae30fe4d4 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -189,31 +189,22 @@ struct dd_function_table {
    /*@{*/
 
    /**
-    * Choose texture format.
-    * 
-    * This is called by the \c _mesa_store_tex[sub]image[123]d() fallback
-    * functions.  The driver should examine \p internalFormat and return a
-    * gl_format value.
+    * Choose actual hardware texture format given the user-provided source
+    * image format and type and the desired internal format.  In some
+    * cases, srcFormat and srcType can be GL_NONE.
+    * Called by glTexImage(), etc.
     */
    GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat,
                                      GLenum srcFormat, GLenum srcType );
 
    /**
-    * Called by glTexImage1D().
-    * 
-    * \param target user specified.
-    * \param format user specified.
-    * \param type user specified.
-    * \param pixels user specified.
-    * \param packing indicates the image packing of pixels.
+    * Called by glTexImage1D().  Simply copy the source texture data into the
+    * destination texture memory.  The gl_texture_image fields, etc. will be
+    * fully initialized.
+    * The parameters are the same as glTexImage1D(), plus:
+    * \param packing describes how to unpack the source data.
     * \param texObj is the target texture object.
-    * \param texImage is the target texture image.  It will have the texture \p
-    * width, \p height, \p depth, \p border and \p internalFormat information.
-    * 
-    * \p retainInternalCopy is returned by this function and indicates whether
-    * core Mesa should keep an internal copy of the texture image.
-    *
-    * Drivers should call a fallback routine from texstore.c if needed.
+    * \param texImage is the target texture image.
     */
    void (*TexImage1D)( struct gl_context *ctx, GLenum target, GLint level,
                        GLint internalFormat,
@@ -250,25 +241,9 @@ struct dd_function_table {
                        struct gl_texture_image *texImage );
 
    /**
-    * Called by glTexSubImage1D().
-    *
-    * \param target user specified.
-    * \param level user specified.
-    * \param xoffset user specified.
-    * \param yoffset user specified.
-    * \param zoffset user specified.
-    * \param width user specified.
-    * \param height user specified.
-    * \param depth user specified.
-    * \param format user specified.
-    * \param type user specified.
-    * \param pixels user specified.
-    * \param packing indicates the image packing of pixels.
-    * \param texObj is the target texture object.
-    * \param texImage is the target texture image.  It will have the texture \p
-    * width, \p height, \p border and \p internalFormat information.
-    *
-    * The driver should use a fallback routine from texstore.c if needed.
+    * Called by glTexSubImage1D().  Replace a subset of the target texture
+    * with new texel data.
+    * \sa dd_function_table::TexImage1D.
     */
    void (*TexSubImage1D)( struct gl_context *ctx, GLenum target, GLint level,
                           GLint xoffset, GLsizei width,

From d84791a72b33f96fab54ff2399e8053c50205454 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= <fredrik@kde.org>
Date: Tue, 19 Jul 2011 15:25:32 -0600
Subject: [PATCH 007/600] st/mesa: fix the texture format in
 st_context_teximage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 1a339b6c71ebab6e1a64f05b2e133022d3bbcd15 made
st_ChooseTextureFormat map GL_RGBA with type GL_UNSIGNED_BYTE
to PIPE_FORMAT_A8B8G8R8_UNORM.

The image format for ARGB pixmaps is PIPE_FORMAT_B8G8R8A8_UNORM
however. This mismatch caused the texture to be recreated in
st_finalize_texture.

NOTE: This is a candidate for the 7.11 branch.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39209
Signed-off-by: Fredrik Höglund <fredrik@kde.org>
Reviewed-by: Stéphane Marchesin <marcheu@chromium.org>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/state_tracker/st_manager.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 7bd82aae206..d5228d387f7 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -587,7 +587,7 @@ st_context_teximage(struct st_context_iface *stctxi,
          internalFormat = GL_RGB;
 
       texFormat = st_ChooseTextureFormat(ctx, internalFormat,
-                                         GL_RGBA, GL_UNSIGNED_BYTE);
+                                         GL_BGRA, GL_UNSIGNED_BYTE);
 
       _mesa_init_teximage_fields(ctx, target, texImage,
                                  tex->width0, tex->height0, 1, 0,

From 496bf3822a724127b2632596dc45648fdeda0afb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B8rgen=20Lind?= <jorgen.lind@nokia.com>
Date: Tue, 19 Jul 2011 22:52:20 +0200
Subject: [PATCH 008/600] Make it possible to use gbm with c++

NOTE: This is a candiate for 7.11
---
 src/gbm/main/gbm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h
index d79a03e4b3f..05d2292dc75 100644
--- a/src/gbm/main/gbm.h
+++ b/src/gbm/main/gbm.h
@@ -28,6 +28,11 @@
 #ifndef _GBM_H_
 #define _GBM_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
 #define __GBM__ 1
 
 #include <stdint.h>
@@ -97,4 +102,8 @@ gbm_bo_get_handle(struct gbm_bo *bo);
 void
 gbm_bo_destroy(struct gbm_bo *bo);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif

From 5874890c26f434f54e9218b83fae4eb8175c24e9 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: [PATCH 009/600] mesa: stop using ctx->Driver.CopyTexImage1D/2D()
 hooks

---
 src/mesa/main/teximage.c | 51 ++++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 6f53686e7ff..302fd65cb27 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2797,29 +2797,43 @@ copyteximage(struct gl_context *ctx, GLuint dims,
 	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
       }
       else {
-         gl_format texFormat;
-
-         if (texImage->Data) {
-            ctx->Driver.FreeTexImageData( ctx, texImage );
-         }
-
-         ASSERT(texImage->Data == NULL);
-
-         texFormat = _mesa_choose_texture_format(ctx, texObj, target, level,
-                                                 internalFormat, GL_NONE,
-                                                 GL_NONE);
+         /* choose actual hw format */
+         gl_format texFormat = _mesa_choose_texture_format(ctx, texObj,
+                                                           target, level,
+                                                           internalFormat,
+                                                           GL_NONE, GL_NONE);
 
          if (legal_texture_size(ctx, texFormat, width, height, 1)) {
+            GLint srcX = x, srcY = y, dstX = 0, dstY = 0;
+
+            /* Free old texture image */
+            ctx->Driver.FreeTexImageData(ctx, texImage);
+
             _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1,
                                        border, internalFormat, texFormat);
 
-            ASSERT(ctx->Driver.CopyTexImage2D);
-            if (dims == 1)
-               ctx->Driver.CopyTexImage1D(ctx, target, level, internalFormat,
-                                          x, y, width, border);
-            else
-               ctx->Driver.CopyTexImage2D(ctx, target, level, internalFormat,
-                                          x, y, width, height, border);
+            /* Allocate texture memory (no pixel data yet) */
+            if (dims == 1) {
+               ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+                                      width, border, GL_NONE, GL_NONE, NULL,
+                                      &ctx->Unpack, texObj, texImage);
+            }
+            else {
+               ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+                                      width, height, border, GL_NONE, GL_NONE,
+                                      NULL, &ctx->Unpack, texObj, texImage);
+            }
+
+            if (_mesa_clip_copytexsubimage(ctx, &dstX, &dstY, &srcX, &srcY,
+                                           &width, &height)) {
+               if (dims == 1)
+                  ctx->Driver.CopyTexSubImage1D(ctx, target, level, dstX,
+                                                srcX, srcY, width);
+                                                
+               else
+                  ctx->Driver.CopyTexSubImage2D(ctx, target, level, dstX, dstY,
+                                                srcX, srcY, width, height);
+            }
 
             check_gen_mipmap(ctx, target, texObj, level);
 
@@ -2830,6 +2844,7 @@ copyteximage(struct gl_context *ctx, GLuint dims,
             ctx->NewState |= _NEW_TEXTURE;
          }
          else {
+            /* probably too large of image */
             _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
          }
       }

From 1da28fa959e80610ebc9b7a28bfb83e3cad3aee4 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: [PATCH 010/600] mesa: remove comments referring to
 Driver.TexImage1D/2D

---
 src/mesa/main/texstore.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 6e1e63bdfb0..c4aeaa8f16d 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -4577,8 +4577,7 @@ texture_row_stride(const struct gl_texture_image *texImage)
 
 
 /**
- * This is the software fallback for Driver.TexImage1D()
- * and Driver.CopyTexImage1D().
+ * This is the software fallback for Driver.TexImage1D().
  * \sa _mesa_store_teximage2d()
  */
 void
@@ -4629,8 +4628,7 @@ _mesa_store_teximage1d(struct gl_context *ctx, GLenum target, GLint level,
 
 
 /**
- * This is the software fallback for Driver.TexImage2D()
- * and Driver.CopyTexImage2D().
+ * This is the software fallback for Driver.TexImage2D().
  *
  * This function is oriented toward storing images in main memory, rather
  * than VRAM.  Device driver's can easily plug in their own replacement.
@@ -4684,8 +4682,7 @@ _mesa_store_teximage2d(struct gl_context *ctx, GLenum target, GLint level,
 
 
 /**
- * This is the software fallback for Driver.TexImage3D()
- * and Driver.CopyTexImage3D().
+ * This is the software fallback for Driver.TexImage3D().
  * \sa _mesa_store_teximage2d()
  */
 void

From fbe6836043dff2798571b838096ed59c60ec4438 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: [PATCH 011/600] intel: remove intelCopyTexImage1D/2D()

---
 src/mesa/drivers/dri/intel/intel_tex_copy.c | 97 ---------------------
 1 file changed, 97 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 1a3643da593..e89e91dee3e 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -163,101 +163,6 @@ intel_copy_texsubimage(struct intel_context *intel,
 }
 
 
-static void
-intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level,
-                    GLenum internalFormat,
-                    GLint x, GLint y, GLsizei width, GLint border)
-{
-   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-   int srcx, srcy, dstx, dsty, height;
-
-   if (border)
-      goto fail;
-
-   /* Setup or redefine the texture object, mipmap tree and texture
-    * image.  Don't populate yet.  
-    */
-   ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
-                          width, border,
-                          GL_RGBA, CHAN_TYPE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-   srcx = x;
-   srcy = y;
-   dstx = 0;
-   dsty = 0;
-   height = 1;
-   if (!_mesa_clip_copytexsubimage(ctx,
-				   &dstx, &dsty,
-				   &srcx, &srcy,
-				   &width, &height))
-      return;
-
-   if (!intel_copy_texsubimage(intel_context(ctx), target,
-                               intel_texture_image(texImage),
-                               internalFormat, 0, 0, x, y, width, height))
-      goto fail;
-
-   return;
-
- fail:
-   fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
-   _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y,
-                             width, border);
-}
-
-
-static void
-intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level,
-                    GLenum internalFormat,
-                    GLint x, GLint y, GLsizei width, GLsizei height,
-                    GLint border)
-{
-   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-   int srcx, srcy, dstx, dsty;
-
-   if (border)
-      goto fail;
-
-   /* Setup or redefine the texture object, mipmap tree and texture
-    * image.  Don't populate yet.
-    */
-   ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                          width, height, border,
-                          GL_RGBA, GL_UNSIGNED_BYTE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-
-   srcx = x;
-   srcy = y;
-   dstx = 0;
-   dsty = 0;
-   if (!_mesa_clip_copytexsubimage(ctx,
-				   &dstx, &dsty,
-				   &srcx, &srcy,
-				   &width, &height))
-      return;
-
-   if (!intel_copy_texsubimage(intel_context(ctx), target,
-                               intel_texture_image(texImage),
-                               internalFormat, 0, 0, x, y, width, height))
-      goto fail;
-
-   return;
-
- fail:
-   fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
-   _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
-                             width, height, border);
-}
-
-
 static void
 intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level,
                        GLint xoffset, GLint x, GLint y, GLsizei width)
@@ -312,8 +217,6 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level,
 void
 intelInitTextureCopyImageFuncs(struct dd_function_table *functions)
 {
-   functions->CopyTexImage1D = intelCopyTexImage1D;
-   functions->CopyTexImage2D = intelCopyTexImage2D;
    functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
    functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
 }

From 9ed87c4463cf265b06566d15ba86bf20661c70de Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: [PATCH 012/600] radeon: remove radeonCopyTexImage2D()

---
 src/mesa/drivers/dri/r200/r200_tex.c          |  1 -
 src/mesa/drivers/dri/r300/r300_tex.c          |  1 -
 src/mesa/drivers/dri/r600/evergreen_tex.c     |  1 -
 src/mesa/drivers/dri/r600/r600_tex.c          |  1 -
 src/mesa/drivers/dri/radeon/radeon_tex.c      |  1 -
 src/mesa/drivers/dri/radeon/radeon_tex_copy.c | 55 -------------------
 src/mesa/drivers/dri/radeon/radeon_texture.h  |  5 --
 7 files changed, 65 deletions(-)

diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
index d42e8f12041..91e77f9f7da 100644
--- a/src/mesa/drivers/dri/r200/r200_tex.c
+++ b/src/mesa/drivers/dri/r200/r200_tex.c
@@ -527,7 +527,6 @@ void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *fu
    functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
 
    if (radeon->radeonScreen->kernel_mm) {
-      functions->CopyTexImage2D = radeonCopyTexImage2D;
       functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
    }
 
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index 590d9afe14a..93d8fe185ef 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -379,7 +379,6 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun
 	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
 
 	if (radeon->radeonScreen->kernel_mm) {
-		functions->CopyTexImage2D = radeonCopyTexImage2D;
 		functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
 	}
 
diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c
index 33a5f277683..9784a8484f2 100644
--- a/src/mesa/drivers/dri/r600/evergreen_tex.c
+++ b/src/mesa/drivers/dri/r600/evergreen_tex.c
@@ -1688,7 +1688,6 @@ void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table
 	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
 
 	if (radeon->radeonScreen->kernel_mm) {
-		functions->CopyTexImage2D = radeonCopyTexImage2D;
 		functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
 	}
 
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index eb7ed30c7a3..3efa1d197fa 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -470,7 +470,6 @@ void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun
 	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
 
 	if (radeon->radeonScreen->kernel_mm) {
-		functions->CopyTexImage2D = radeonCopyTexImage2D;
 		functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
 	}
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
index 25a8ddf7b6a..a0b5506ae76 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
@@ -455,7 +455,6 @@ void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *
    functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
 
    if (radeon->radeonScreen->kernel_mm) {
-      functions->CopyTexImage2D = radeonCopyTexImage2D;
       functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
    }
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
index f14dfa25d40..94ff3c4a727 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
@@ -140,61 +140,6 @@ do_copy_texsubimage(struct gl_context *ctx,
                              dstx, dsty, width, height, flip_y);
 }
 
-void
-radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-                     GLenum internalFormat,
-                     GLint x, GLint y, GLsizei width, GLsizei height,
-                     GLint border)
-{
-    struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
-    struct gl_texture_object *texObj =
-        _mesa_select_tex_object(ctx, texUnit, target);
-    struct gl_texture_image *texImage =
-        _mesa_select_tex_image(ctx, texObj, target, level);
-    int srcx, srcy, dstx, dsty;
-
-    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-    radeon_prepare_render(radeon);
-
-    if (border)
-        goto fail;
-
-    /* Setup or redefine the texture object, mipmap tree and texture
-     * image.  Don't populate yet.
-     */
-    ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                           width, height, border,
-                           GL_RGBA, GL_UNSIGNED_BYTE, NULL,
-                           &ctx->DefaultPacking, texObj, texImage);
-
-    srcx = x;
-    srcy = y;
-    dstx = 0;
-    dsty = 0;
-    if (!_mesa_clip_copytexsubimage(ctx,
-                                    &dstx, &dsty,
-                                    &srcx, &srcy,
-                                    &width, &height)) {
-        return;
-    }
-
-    if (!do_copy_texsubimage(ctx, target, level,
-                             radeon_tex_obj(texObj), (radeon_texture_image *)texImage,
-                             0, 0, x, y, width, height)) {
-        goto fail;
-    }
-
-    return;
-
-fail:
-    radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
-                 "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n",
-                 _mesa_lookup_enum_by_nr(internalFormat), border);
-
-    _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
-                              width, height, border);
-}
-
 void
 radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
                         GLint xoffset, GLint yoffset,
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
index 538a07fbba8..6fc06d967dd 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.h
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
@@ -126,11 +126,6 @@ void radeonGetCompressedTexImage(struct gl_context *ctx, GLenum target, GLint le
 				 struct gl_texture_object *texObj,
 				 struct gl_texture_image *texImage);
 
-void radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-			GLenum internalFormat,
-			GLint x, GLint y, GLsizei width, GLsizei height,
-			GLint border);
-
 void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
 			GLint xoffset, GLint yoffset,
 			GLint x, GLint y,

From 0823ef84a5c3a6332ea76d0001febf6aaa440dc3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: [PATCH 013/600] st/mesa: remove st_CopyTexImage1D/2D()

---
 src/mesa/state_tracker/st_cb_texture.c | 55 --------------------------
 1 file changed, 55 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 6907cfc03cf..83e83695aae 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1609,59 +1609,6 @@ st_copy_texsubimage(struct gl_context *ctx,
 
 
 
-static void
-st_CopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level,
-                  GLenum internalFormat,
-                  GLint x, GLint y, GLsizei width, GLint border)
-{
-   struct gl_texture_unit *texUnit =
-      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-
-   /* Setup or redefine the texture object, texture and texture
-    * image.  Don't populate yet.  
-    */
-   ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
-                          width, border,
-                          GL_RGBA, CHAN_TYPE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-
-   st_copy_texsubimage(ctx, target, level,
-                       0, 0, 0,  /* destX,Y,Z */
-                       x, y, width, 1);  /* src X, Y, size */
-}
-
-
-static void
-st_CopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level,
-                  GLenum internalFormat,
-                  GLint x, GLint y, GLsizei width, GLsizei height,
-                  GLint border)
-{
-   struct gl_texture_unit *texUnit =
-      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-
-   /* Setup or redefine the texture object, texture and texture
-    * image.  Don't populate yet.  
-    */
-   ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                          width, height, border,
-                          GL_RGBA, CHAN_TYPE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-
-   st_copy_texsubimage(ctx, target, level,
-                       0, 0, 0,  /* destX,Y,Z */
-                       x, y, width, height);  /* src X, Y, size */
-}
-
-
 static void
 st_CopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level,
                      GLint xoffset, GLint x, GLint y, GLsizei width)
@@ -1947,8 +1894,6 @@ st_init_texture_functions(struct dd_function_table *functions)
    functions->CompressedTexSubImage1D = st_CompressedTexSubImage1D;
    functions->CompressedTexSubImage2D = st_CompressedTexSubImage2D;
    functions->CompressedTexSubImage3D = st_CompressedTexSubImage3D;
-   functions->CopyTexImage1D = st_CopyTexImage1D;
-   functions->CopyTexImage2D = st_CopyTexImage2D;
    functions->CopyTexSubImage1D = st_CopyTexSubImage1D;
    functions->CopyTexSubImage2D = st_CopyTexSubImage2D;
    functions->CopyTexSubImage3D = st_CopyTexSubImage3D;

From 774311fb5403e3da7ff0197199ffad8f34089e6a Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: [PATCH 014/600] meta: remove _mesa_meta_CopyTexImage1D/2D()

---
 src/mesa/drivers/common/driverfuncs.c |   2 -
 src/mesa/drivers/common/meta.c        | 113 --------------------------
 src/mesa/drivers/common/meta.h        |  10 ---
 3 files changed, 125 deletions(-)

diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 8ab129dd73d..76630264bf7 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -95,8 +95,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
    driver->TexSubImage2D = _mesa_store_texsubimage2d;
    driver->TexSubImage3D = _mesa_store_texsubimage3d;
    driver->GetTexImage = _mesa_get_teximage;
-   driver->CopyTexImage1D = _mesa_meta_CopyTexImage1D;
-   driver->CopyTexImage2D = _mesa_meta_CopyTexImage2D;
    driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D;
    driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D;
    driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D;
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 0e58aeca3f5..706239c7736 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -2677,119 +2677,6 @@ get_temp_image_type(struct gl_context *ctx, GLenum baseFormat)
 }
 
 
-/**
- * Helper for _mesa_meta_CopyTexImage1/2D() functions.
- * Have to be careful with locking and meta state for pixel transfer.
- */
-static void
-copy_tex_image(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
-               GLenum internalFormat, GLint x, GLint y,
-               GLsizei width, GLsizei height, GLint border)
-{
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
-   GLenum format, type;
-   GLint bpp;
-   void *buf;
-   struct gl_renderbuffer *read_rb = ctx->ReadBuffer->_ColorReadBuffer;
-
-   texObj = _mesa_get_current_tex_object(ctx, target);
-   texImage = _mesa_get_tex_image(ctx, texObj, target, level);
-
-   /* Choose format/type for temporary image buffer */
-   format = _mesa_base_tex_format(ctx, internalFormat);
-
-   if (format == GL_LUMINANCE &&
-       _mesa_get_format_base_format(read_rb->Format) != GL_LUMINANCE) {
-      /* The glReadPixels() path will convert RGB to luminance by
-       * summing R+G+B.  glCopyTexImage() is supposed to behave as
-       * glCopyPixels, which doesn't do that change, and instead
-       * leaves it up to glTexImage which converts RGB to luminance by
-       * just taking the R channel.  To avoid glReadPixels() trashing
-       * our data, use RGBA for our temporary image.
-       */
-      format = GL_RGBA;
-   }
-
-   type = get_temp_image_type(ctx, format);
-   bpp = _mesa_bytes_per_pixel(format, type);
-   if (bpp <= 0) {
-      _mesa_problem(ctx, "Bad bpp in meta copy_tex_image()");
-      return;
-   }
-
-   /*
-    * Alloc image buffer (XXX could use a PBO)
-    */
-   buf = malloc(width * height * bpp);
-   if (!buf) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
-      return;
-   }
-
-   _mesa_unlock_texture(ctx, texObj); /* need to unlock first */
-
-   /*
-    * Read image from framebuffer (disable pixel transfer ops)
-    */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
-   ctx->Driver.ReadPixels(ctx, x, y, width, height,
-			  format, type, &ctx->Pack, buf);
-   _mesa_meta_end(ctx);
-
-   if (texImage->Data) {
-      ctx->Driver.FreeTexImageData(ctx, texImage);
-   }
-
-   /* The texture's format was already chosen in _mesa_CopyTexImage() */
-   ASSERT(texImage->TexFormat != MESA_FORMAT_NONE);
-
-   /*
-    * Store texture data (with pixel transfer ops)
-    */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE);
-
-   _mesa_update_state(ctx); /* to update pixel transfer state */
-
-   if (target == GL_TEXTURE_1D) {
-      ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
-                             width, border, format, type,
-                             buf, &ctx->Unpack, texObj, texImage);
-   }
-   else {
-      ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                             width, height, border, format, type,
-                             buf, &ctx->Unpack, texObj, texImage);
-   }
-   _mesa_meta_end(ctx);
-
-   _mesa_lock_texture(ctx, texObj); /* re-lock */
-
-   free(buf);
-}
-
-
-void
-_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level,
-                          GLenum internalFormat, GLint x, GLint y,
-                          GLsizei width, GLint border)
-{
-   copy_tex_image(ctx, 1, target, level, internalFormat, x, y,
-                  width, 1, border);
-}
-
-
-void
-_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-                          GLenum internalFormat, GLint x, GLint y,
-                          GLsizei width, GLsizei height, GLint border)
-{
-   copy_tex_image(ctx, 2, target, level, internalFormat, x, y,
-                  width, height, border);
-}
-
-
-
 /**
  * Helper for _mesa_meta_CopyTexSubImage1/2/3D() functions.
  * Have to be careful with locking and meta state for pixel transfer.
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index b0797d3d91a..7190dee768a 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -68,16 +68,6 @@ extern void
 _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
                           struct gl_texture_object *texObj);
 
-extern void
-_mesa_meta_CopyTexImage1D(struct gl_context *ctx, GLenum target, GLint level,
-                          GLenum internalFormat, GLint x, GLint y,
-                          GLsizei width, GLint border);
-
-extern void
-_mesa_meta_CopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-                          GLenum internalFormat, GLint x, GLint y,
-                          GLsizei width, GLsizei height, GLint border);
-
 extern void
 _mesa_meta_CopyTexSubImage1D(struct gl_context *ctx, GLenum target, GLint level,
                              GLint xoffset,

From 1c1fc62e388534b6c0751fc9f8ab34a89e25efd0 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: [PATCH 015/600] mesa: remove unused
 dd_function_table::CopyTexImage1D/2D() hooks

---
 src/mesa/main/dd.h | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index e1ae30fe4d4..e0c5844e193 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -289,24 +289,6 @@ struct dd_function_table {
                         struct gl_texture_object *texObj,
                         struct gl_texture_image *texImage );
 
-   /**
-    * Called by glCopyTexImage1D().
-    * 
-    * Drivers should use a fallback routine from texstore.c if needed.
-    */
-   void (*CopyTexImage1D)( struct gl_context *ctx, GLenum target, GLint level,
-                           GLenum internalFormat, GLint x, GLint y,
-                           GLsizei width, GLint border );
-
-   /**
-    * Called by glCopyTexImage2D().
-    * 
-    * Drivers should use a fallback routine from texstore.c if needed.
-    */
-   void (*CopyTexImage2D)( struct gl_context *ctx, GLenum target, GLint level,
-                           GLenum internalFormat, GLint x, GLint y,
-                           GLsizei width, GLsizei height, GLint border );
-
    /**
     * Called by glCopyTexSubImage1D().
     * 

From d5e32397762a3bd55fa69ad6332351512083f9c6 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 20:03:05 -0600
Subject: [PATCH 016/600] st/mesa: get rid of redundant clipping code in
 st_copy_texsubimage()

---
 src/mesa/state_tracker/st_cb_texture.c | 28 --------------------------
 1 file changed, 28 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 83e83695aae..25f08aa4d09 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1466,34 +1466,6 @@ st_copy_texsubimage(struct gl_context *ctx,
       depth/stencil samples per pixel? Need some transfer clarifications. */
    assert(sample_count < 2);
 
-   if (srcX < 0) {
-      width -= -srcX;
-      destX += -srcX;
-      srcX = 0;
-   }
-
-   if (srcY < 0) {
-      height -= -srcY;
-      destY += -srcY;
-      srcY = 0;
-   }
-
-   if (destX < 0) {
-      width -= -destX;
-      srcX += -destX;
-      destX = 0;
-   }
-
-   if (destY < 0) {
-      height -= -destY;
-      srcY += -destY;
-      destY = 0;
-   }
-
-   if (width < 0 || height < 0)
-      return;
-
-
    assert(strb);
    assert(strb->surface);
    assert(stImage->pt);

From 4470ff2ebf56b22421038bc7272ef22c085b839d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 21:10:25 -0600
Subject: [PATCH 017/600] glsl: silence warning in linker.cpp

---
 src/glsl/linker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 34b64837a46..5ec08446d16 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1343,7 +1343,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
    foreach_list(node, sh->ir) {
       ir_variable *const var = ((ir_instruction *) node)->as_variable();
 
-      if ((var == NULL) || (var->mode != direction))
+      if ((var == NULL) || (var->mode != (unsigned) direction))
 	 continue;
 
       if (var->explicit_location) {

From 8d044047f133ad4e4c4f1f6b4a4a1c4a9fa477f5 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 19 Jul 2011 21:11:53 -0600
Subject: [PATCH 018/600] mesa: remove depend files from tarballs

---
 Makefile | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/Makefile b/Makefile
index b0a2d8065f6..817f3d3706a 100644
--- a/Makefile
+++ b/Makefile
@@ -203,12 +203,6 @@ EXTRA_FILES = \
 IGNORE_FILES = \
 	-x autogen.sh
 
-DEPEND_FILES = \
-	src/mesa/depend		\
-	src/glx/depend		\
-	src/glw/depend		\
-	src/glu/sgi/depend
-
 
 parsers: configure
 	-@touch $(TOP)/configs/current
@@ -231,15 +225,9 @@ AUTOCONF = autoconf
 AC_FLAGS =
 aclocal.m4: configure.ac acinclude.m4
 	$(ACLOCAL) $(ACLOCAL_FLAGS)
-configure: rm_depend configure.ac aclocal.m4 acinclude.m4
+configure: configure.ac aclocal.m4 acinclude.m4
 	$(AUTOCONF) $(AC_FLAGS)
 
-rm_depend:
-	@for dep in $(DEPEND_FILES) ; do \
-		rm -f $$dep ; \
-		touch $$dep ; \
-	done
-
 manifest.txt: .git
 	( \
 		ls -1 $(EXTRA_FILES) ; \
@@ -269,4 +257,4 @@ md5: $(ARCHIVES)
 	@-md5sum $(PACKAGE_NAME).tar.bz2
 	@-md5sum $(PACKAGE_NAME).zip
 
-.PHONY: tarballs rm_depend md5
+.PHONY: tarballs md5

From 3875526926123259521514de9c8d675e3797275a Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 15 Jul 2011 02:27:49 -0700
Subject: [PATCH 019/600] glsl: Avoid massive ralloc_strndup overhead in
 S-Expression parsing.

When parsing S-Expressions, we need to store nul-terminated strings for
Symbol nodes.  Prior to this patch, we called ralloc_strndup each time
we constructed a new s_symbol.  It turns out that this is obscenely
expensive.

Instead, copy the whole buffer before parsing and overwrite it to
contain \0 bytes at the appropriate locations.  Since atoms are
separated by whitespace, (), or ;, we can safely overwrite the character
after a Symbol.  While much of the buffer may be unused, copying the
whole buffer is simple and guaranteed to provide enough space.

Prior to this, running piglit-run.py -t glsl tests/quick.tests with GLSL
1.30 enabled took just over 10 minutes on my machine.  Now it takes 5.

NOTE: This is a candidate for stable release branches (because it will
      make running comparison tests so much less irritating.)

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/s_expression.cpp | 94 +++++++++++++++++++++++++--------------
 src/glsl/s_expression.h   |  2 +-
 2 files changed, 62 insertions(+), 34 deletions(-)

diff --git a/src/glsl/s_expression.cpp b/src/glsl/s_expression.cpp
index a922a50d3b9..e704a3be20d 100644
--- a/src/glsl/s_expression.cpp
+++ b/src/glsl/s_expression.cpp
@@ -25,10 +25,13 @@
 #include <assert.h>
 #include "s_expression.h"
 
-s_symbol::s_symbol(const char *tmp, size_t n)
+s_symbol::s_symbol(const char *str, size_t n)
 {
-   this->str = ralloc_strndup (this, tmp, n);
-   assert(this->str != NULL);
+   /* Assume the given string is already nul-terminated and in memory that
+    * will live as long as this node.
+    */
+   assert(str[n] == '\0');
+   this->str = str;
 }
 
 s_list::s_list()
@@ -36,22 +39,26 @@ s_list::s_list()
 }
 
 static void
-skip_whitespace(const char *& src)
+skip_whitespace(const char *&src, char *&symbol_buffer)
 {
-   src += strspn(src, " \v\t\r\n");
+   size_t n = strspn(src, " \v\t\r\n");
+   src += n;
+   symbol_buffer += n;
    /* Also skip Scheme-style comments: semi-colon 'til end of line */
    if (src[0] == ';') {
-      src += strcspn(src, "\n");
-      skip_whitespace(src);
+      n = strcspn(src, "\n");
+      src += n;
+      symbol_buffer += n;
+      skip_whitespace(src, symbol_buffer);
    }
 }
 
 static s_expression *
-read_atom(void *ctx, const char *& src)
+read_atom(void *ctx, const char *&src, char *&symbol_buffer)
 {
    s_expression *expr = NULL;
 
-   skip_whitespace(src);
+   skip_whitespace(src, symbol_buffer);
 
    size_t n = strcspn(src, "( \v\t\r\n);");
    if (n == 0)
@@ -70,42 +77,63 @@ read_atom(void *ctx, const char *& src)
 	 expr = new(ctx) s_int(i);
    } else {
       // Not a number; return a symbol.
-      expr = new(ctx) s_symbol(src, n);
+      symbol_buffer[n] = '\0';
+      expr = new(ctx) s_symbol(symbol_buffer, n);
    }
 
    src += n;
+   symbol_buffer += n;
 
    return expr;
 }
 
+static s_expression *
+__read_expression(void *ctx, const char *&src, char *&symbol_buffer)
+{
+   s_expression *atom = read_atom(ctx, src, symbol_buffer);
+   if (atom != NULL)
+      return atom;
+
+   skip_whitespace(src, symbol_buffer);
+   if (src[0] == '(') {
+      ++src;
+      ++symbol_buffer;
+
+      s_list *list = new(ctx) s_list;
+      s_expression *expr;
+
+      while ((expr = __read_expression(ctx, src, symbol_buffer)) != NULL) {
+	 list->subexpressions.push_tail(expr);
+      }
+      skip_whitespace(src, symbol_buffer);
+      if (src[0] != ')') {
+	 printf("Unclosed expression (check your parenthesis).\n");
+	 return NULL;
+      }
+      ++src;
+      ++symbol_buffer;
+      return list;
+   }
+   return NULL;
+}
+
 s_expression *
 s_expression::read_expression(void *ctx, const char *&src)
 {
    assert(src != NULL);
 
-   s_expression *atom = read_atom(ctx, src);
-   if (atom != NULL)
-      return atom;
-
-   skip_whitespace(src);
-   if (src[0] == '(') {
-      ++src;
-
-      s_list *list = new(ctx) s_list;
-      s_expression *expr;
-
-      while ((expr = read_expression(ctx, src)) != NULL) {
-	 list->subexpressions.push_tail(expr);
-      }
-      skip_whitespace(src);
-      if (src[0] != ')') {
-	 printf("Unclosed expression (check your parenthesis).\n");
-	 return NULL;
-      }
-      ++src;
-      return list;
-   }
-   return NULL;
+   /* When we encounter a Symbol, we need to save a nul-terminated copy of
+    * the string.  However, ralloc_strndup'ing every individual Symbol is
+    * extremely expensive.  We could avoid this by simply overwriting the
+    * next character (guaranteed to be whitespace, parens, or semicolon) with
+    * a nul-byte.  But overwriting non-whitespace would mess up parsing.
+    *
+    * So, just copy the whole buffer ahead of time.  Walk both, leaving the
+    * original source string unmodified, and altering the copy to contain the
+    * necessary nul-bytes whenever we encounter a symbol.
+    */
+   char *symbol_buffer = ralloc_strdup(ctx, src);
+   return __read_expression(ctx, src, symbol_buffer);
 }
 
 void s_int::print()
diff --git a/src/glsl/s_expression.h b/src/glsl/s_expression.h
index c9dc676b319..642af19b439 100644
--- a/src/glsl/s_expression.h
+++ b/src/glsl/s_expression.h
@@ -129,7 +129,7 @@ public:
    void print();
 
 private:
-   char *str;
+   const char *str;
 };
 
 /* Lists of expressions: (expr1 ... exprN) */

From 8082816e27a0ee376e679c4d81ff8a3f0611ea9e Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Wed, 20 Jul 2011 13:43:24 -0400
Subject: [PATCH 020/600] g3dvl: Init/clean pipe fully when a shader-based
 decoder isn't used.

Fixes VDPAU CSC-only mode.
---
 src/gallium/auxiliary/vl/vl_compositor.c | 25 ++++++++++++++++++++++++
 src/gallium/auxiliary/vl/vl_compositor.h |  1 +
 2 files changed, 26 insertions(+)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 3bd4af2e3e0..faca96dc55b 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -231,6 +231,8 @@ init_pipe_state(struct vl_compositor *c)
    struct pipe_rasterizer_state rast;
    struct pipe_sampler_state sampler;
    struct pipe_blend_state blend;
+   struct pipe_depth_stencil_alpha_state dsa;
+   unsigned i;
 
    assert(c);
 
@@ -289,6 +291,24 @@ init_pipe_state(struct vl_compositor *c)
 
    c->rast = c->pipe->create_rasterizer_state(c->pipe, &rast);
 
+   memset(&dsa, 0, sizeof dsa);
+   dsa.depth.enabled = 0;
+   dsa.depth.writemask = 0;
+   dsa.depth.func = PIPE_FUNC_ALWAYS;
+   for (i = 0; i < 2; ++i) {
+      dsa.stencil[i].enabled = 0;
+      dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
+      dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].valuemask = 0;
+      dsa.stencil[i].writemask = 0;
+   }
+   dsa.alpha.enabled = 0;
+   dsa.alpha.func = PIPE_FUNC_ALWAYS;
+   dsa.alpha.ref_value = 0;
+   c->dsa = c->pipe->create_depth_stencil_alpha_state(c->pipe, &dsa);
+   c->pipe->bind_depth_stencil_alpha_state(c->pipe, c->dsa);
    return true;
 }
 
@@ -296,6 +316,11 @@ static void cleanup_pipe_state(struct vl_compositor *c)
 {
    assert(c);
 
+   /* Asserted in softpipe_delete_fs_state() for some reason */
+   c->pipe->bind_vs_state(c->pipe, NULL);
+   c->pipe->bind_fs_state(c->pipe, NULL);
+
+   c->pipe->delete_depth_stencil_alpha_state(c->pipe, c->dsa);
    c->pipe->delete_sampler_state(c->pipe, c->sampler_linear);
    c->pipe->delete_sampler_state(c->pipe, c->sampler_nearest);
    c->pipe->delete_blend_state(c->pipe, c->blend);
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 87ad39be1be..0a9a7411a61 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -68,6 +68,7 @@ struct vl_compositor
    void *sampler_nearest;
    void *blend;
    void *rast;
+   void *dsa;
    void *vertex_elems_state;
 
    void *vs;

From b56daf71d2f63d044d4c53ab49c6f87e02991a28 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Wed, 20 Jul 2011 13:49:56 -0400
Subject: [PATCH 021/600] g3dvl: Preserve previously rendered components for MC
 output.

Fixes xvmc-softpipe MC entrypoint, amongst others.
---
 src/gallium/auxiliary/vl/vl_mc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
index bd05205b52d..add367e3ac6 100644
--- a/src/gallium/auxiliary/vl/vl_mc.c
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -590,7 +590,7 @@ vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface)
 }
 
 static void
-prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned mask)
+prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned component, unsigned mask)
 {
    struct vl_mc *renderer;
 
@@ -599,7 +599,7 @@ prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned mask)
    renderer = buffer->renderer;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
 
-   if (buffer->surface_cleared)
+   if (buffer->surface_cleared || component > 0)
       renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add[mask]);
    else
       renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear[mask]);
@@ -615,7 +615,7 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref)
 
    assert(buffer && ref);
 
-   prepare_pipe_4_rendering(buffer, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B);
+   prepare_pipe_4_rendering(buffer, 0, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B);
 
    renderer = buffer->renderer;
 
@@ -643,7 +643,7 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num
    if (num_instances == 0)
       return;
 
-   prepare_pipe_4_rendering(buffer, mask);
+   prepare_pipe_4_rendering(buffer, component, mask);
 
    renderer = buffer->renderer;
 

From eee570290aebc8a339acd063033e3daefcef2bc6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 17 Jul 2011 14:53:16 -0700
Subject: [PATCH 022/600] meta: Add a GLSL-based _mesa_meta_Clear() variant.

This cuts out a large portion of the overhead of glClear() from
resetting the texenv state and recomputing the fixed function
programs.  It also means less use of fixed function internally in our
GLES2 drivers, which is rather bogus.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/drivers/common/meta.c | 160 ++++++++++++++++++++++++++++++++-
 src/mesa/drivers/common/meta.h |   3 +
 2 files changed, 162 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 0e58aeca3f5..887118b9417 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -62,6 +62,7 @@
 #include "main/teximage.h"
 #include "main/texparam.h"
 #include "main/texstate.h"
+#include "main/uniforms.h"
 #include "main/varray.h"
 #include "main/viewport.h"
 #include "program/program.h"
@@ -235,6 +236,8 @@ struct clear_state
 {
    GLuint ArrayObj;
    GLuint VBO;
+   GLuint ShaderProg;
+   GLint ColorLocation;
 };
 
 
@@ -1589,10 +1592,165 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
    _mesa_meta_end(ctx);
 }
 
+static void
+meta_glsl_clear_init(struct gl_context *ctx, struct clear_state *clear)
+{
+   const char *vs_source =
+      "attribute vec4 position;\n"
+      "void main()\n"
+      "{\n"
+      "   gl_Position = position;\n"
+      "}\n";
+   const char *fs_source =
+      "uniform vec4 color;\n"
+      "void main()\n"
+      "{\n"
+      "   gl_FragColor = color;\n"
+      "}\n";
+   GLuint vs, fs;
+
+   if (clear->ArrayObj != 0)
+      return;
+
+   /* create vertex array object */
+   _mesa_GenVertexArrays(1, &clear->ArrayObj);
+   _mesa_BindVertexArray(clear->ArrayObj);
+
+   /* create vertex array buffer */
+   _mesa_GenBuffersARB(1, &clear->VBO);
+   _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO);
+
+   /* setup vertex arrays */
+   _mesa_VertexAttribPointerARB(0, 3, GL_FLOAT, GL_FALSE, 0, (void *)0);
+   _mesa_EnableVertexAttribArrayARB(0);
+
+   vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER);
+   _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL);
+   _mesa_CompileShaderARB(vs);
+
+   fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER);
+   _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL);
+   _mesa_CompileShaderARB(fs);
+
+   clear->ShaderProg = _mesa_CreateProgramObjectARB();
+   _mesa_AttachShader(clear->ShaderProg, fs);
+   _mesa_AttachShader(clear->ShaderProg, vs);
+   _mesa_BindAttribLocationARB(clear->ShaderProg, 0, "position");
+   _mesa_LinkProgramARB(clear->ShaderProg);
+
+   clear->ColorLocation = _mesa_GetUniformLocationARB(clear->ShaderProg,
+						      "color");
+}
+
+/**
+ * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering.
+ */
+void
+_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
+{
+   struct clear_state *clear = &ctx->Meta->Clear;
+   GLbitfield metaSave;
+   const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   const float x0 = ((float)fb->_Xmin / fb->Width)  * 2.0f - 1.0f;
+   const float y0 = ((float)fb->_Ymin / fb->Height) * 2.0f - 1.0f;
+   const float x1 = ((float)fb->_Xmax / fb->Width)  * 2.0f - 1.0f;
+   const float y1 = ((float)fb->_Ymax / fb->Height) * 2.0f - 1.0f;
+   const float z = -invert_z(ctx->Depth.Clear);
+   struct vertex {
+      GLfloat x, y, z;
+   } verts[4];
+
+   metaSave = (META_ALPHA_TEST |
+	       META_BLEND |
+	       META_DEPTH_TEST |
+	       META_RASTERIZATION |
+	       META_SHADER |
+	       META_STENCIL_TEST |
+	       META_VERTEX |
+	       META_VIEWPORT |
+	       META_CLAMP_FRAGMENT_COLOR);
+
+   if (!(buffers & BUFFER_BITS_COLOR)) {
+      /* We'll use colormask to disable color writes.  Otherwise,
+       * respect color mask
+       */
+      metaSave |= META_COLOR_MASK;
+   }
+
+   _mesa_meta_begin(ctx, metaSave);
+
+   meta_glsl_clear_init(ctx, clear);
+
+   _mesa_UseProgramObjectARB(clear->ShaderProg);
+   _mesa_Uniform4fvARB(clear->ColorLocation, 1,
+		       ctx->Color.ClearColorUnclamped);
+
+   _mesa_BindVertexArray(clear->ArrayObj);
+   _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO);
+
+   /* GL_COLOR_BUFFER_BIT */
+   if (buffers & BUFFER_BITS_COLOR) {
+      /* leave colormask, glDrawBuffer state as-is */
+
+      /* Clears never have the color clamped. */
+      _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
+   }
+   else {
+      ASSERT(metaSave & META_COLOR_MASK);
+      _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
+   }
+
+   /* GL_DEPTH_BUFFER_BIT */
+   if (buffers & BUFFER_BIT_DEPTH) {
+      _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE);
+      _mesa_DepthFunc(GL_ALWAYS);
+      _mesa_DepthMask(GL_TRUE);
+   }
+   else {
+      assert(!ctx->Depth.Test);
+   }
+
+   /* GL_STENCIL_BUFFER_BIT */
+   if (buffers & BUFFER_BIT_STENCIL) {
+      _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE);
+      _mesa_StencilOpSeparate(GL_FRONT_AND_BACK,
+                              GL_REPLACE, GL_REPLACE, GL_REPLACE);
+      _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS,
+                                ctx->Stencil.Clear & stencilMax,
+                                ctx->Stencil.WriteMask[0]);
+   }
+   else {
+      assert(!ctx->Stencil.Enabled);
+   }
+
+   /* vertex positions */
+   verts[0].x = x0;
+   verts[0].y = y0;
+   verts[0].z = z;
+   verts[1].x = x1;
+   verts[1].y = y0;
+   verts[1].z = z;
+   verts[2].x = x1;
+   verts[2].y = y1;
+   verts[2].z = z;
+   verts[3].x = x0;
+   verts[3].y = y1;
+   verts[3].z = z;
+
+   /* upload new vertex data */
+   _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts,
+		       GL_DYNAMIC_DRAW_ARB);
+
+   /* draw quad */
+   _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+   _mesa_meta_end(ctx);
+}
 
 /**
  * Meta implementation of ctx->Driver.CopyPixels() in terms
- * of texture mapping and polygon rendering.
+ * of texture mapping and polygon rendering and GLSL shaders.
  */
 void
 _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY,
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index b0797d3d91a..32c789ea638 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -42,6 +42,9 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
 extern void
 _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers);
 
+extern void
+_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers);
+
 extern void
 _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
                       GLsizei width, GLsizei height,

From 540e66b3bebb5ae82422e386aa178147ea14a39e Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 17 Jul 2011 14:55:10 -0700
Subject: [PATCH 023/600] intel: Use the GLSL-based meta clear when available.

Improves firefox-talos-gfx performance under GL when 3D clears are
enabled:
[  0]       gl-before     firefox-talos-gfx   20.193   20.251   0.27%    3/3
[  0]       gl-after      firefox-talos-gfx   18.013   18.040   0.19%    3/3
---
 src/mesa/drivers/dri/intel/intel_clear.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index 5ab98734cfc..81c062fba53 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -188,7 +188,10 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
 
    if (tri_mask) {
       debug_mask("tri", tri_mask);
-      _mesa_meta_Clear(&intel->ctx, tri_mask);
+      if (ctx->Extensions.ARB_fragment_shader)
+	 _mesa_meta_glsl_Clear(&intel->ctx, tri_mask);
+      else
+	 _mesa_meta_Clear(&intel->ctx, tri_mask);
    }
 }
 

From dc7422405f6f3c201993251e4665bb9ec1b59db0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 18 Jul 2011 15:25:10 -0700
Subject: [PATCH 024/600] i965: Avoid kernel BUG_ON if we happen to wait on the
 pipe_control w/a BO.

For this and occlusion queries, we're trying to avoid setting
I915_GEM_DOMAIN_RENDER for the write domain, because the data written
is definitely not going through the render cache, but we do need to
tell the kernel that the object has been written.  However, with using
I915_GEM_DOMAIN_GTT, the kernel on retiring the batchbuffer sees that
the w/a BO has a write domain of GTT, and puts it on the flushing
list.  If something tries to wait for that BO to finish rendering
(such as the AUB dumper reading the contents of BOs), we get into
wait_request (since obj->active) but with a 0 seqno (since the object
is on the flushing list, not actually on a ringbuffer), and BUG_ONs.

To avoid the kernel bug (which I'm hoping to delete soon anyway), just
use I915_GEM_DOMAIN_INSTRUCTION like occlusion queries do.  This
doesn't result in more flushing, because we invalidate INSTRUCTION on
every batchbuffer now that we're state streaming, anyway.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Tested-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/intel/intel_batchbuffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index b61a2ffef19..9c97ef22888 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -325,7 +325,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
    OUT_BATCH(_3DSTATE_PIPE_CONTROL);
    OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
    OUT_RELOC(intel->batch.workaround_bo,
-	     I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0);
+	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
    OUT_BATCH(0); /* write data */
    ADVANCE_BATCH();
 

From 407785d0e97abd0cc51a6e360089111973748e7c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 18 Jul 2011 17:17:03 -0700
Subject: [PATCH 025/600] i965: Enable the PIPE_CONTROL workaround workaround
 out of paranoia.

There's scary stuff going on in PIPE_CONTROL internals, and if the
BSpec says to do this to make PIPE_CONTROL work, I'll go ahead and do
it because we'll probably never be able to debug it after the fact.

v2: Use stall at scoreboard instead of depth stall, as noted by Ken.
---
 .../drivers/dri/intel/intel_batchbuffer.c     | 31 +++++++++++++++++--
 src/mesa/drivers/dri/intel/intel_reg.h        |  1 +
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 9c97ef22888..97cc219ce6d 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -308,12 +308,29 @@ emit:
  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
  *
- * XXX: There is also a workaround that would appear to apply to this
- * workaround, but it doesn't appear to be necessary so far:
+ * And the workaround for these two requires this workaround first:
  *
- * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
  * BEFORE the pipe-control with a post-sync op and no write-cache
  * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ *     "1 of the following must also be set:
+ *      - Render Target Cache Flush Enable ([12] of DW1)
+ *      - Depth Cache Flush Enable ([0] of DW1)
+ *      - Stall at Pixel Scoreboard ([1] of DW1)
+ *      - Depth Stall ([13] of DW1)
+ *      - Post-Sync Operation ([13] of DW1)
+ *      - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it.  Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either.  Notify enable is IRQs, which aren't
+ * really our business.  That leaves only stall at scoreboard.
  */
 void
 intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
@@ -321,6 +338,14 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel)
    if (!intel->batch.need_workaround_flush)
       return;
 
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+   OUT_BATCH(PIPE_CONTROL_CS_STALL |
+	     PIPE_CONTROL_STALL_AT_SCOREBOARD);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_PIPE_CONTROL);
    OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
index 5aa629150cf..a98a669af21 100644
--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -75,6 +75,7 @@
 #define PIPE_CONTROL_VF_CACHE_INVALIDATE	(1 << 4)
 #define PIPE_CONTROL_CONST_CACHE_INVALIDATE	(1 << 3)
 #define PIPE_CONTROL_STATE_CACHE_INVALIDATE	(1 << 2)
+#define PIPE_CONTROL_STALL_AT_SCOREBOARD	(1 << 1)
 #define PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1 << 0)
 #define PIPE_CONTROL_PPGTT_WRITE	(0 << 2)
 #define PIPE_CONTROL_GLOBAL_GTT_WRITE	(1 << 2)

From 3e5d36267d8c9536490c902f785137a7fa0637fc Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 19 Jul 2011 15:06:15 -0700
Subject: [PATCH 026/600] i965: Apply a homebrew workaround for GPU hang in
 OGLC api-texcoord.

The behavior of flushes in the hardware is a maze of twisty passages,
and strangely the VS constants appear to be loaded during a pipeline
flush instead of at the time of the packet emit according to the
simulator.  On moving the STATE_BASE_ADDRESS packet to where it really
needed to live (in order for data loads by other packets to be
correct), we sometimes no longer got a flush between those packets
where we apparently needed it.  This replicates the flushes implied by
a STATE_BASE_ADDRESS update, fixing the GPU hangs in OGLC and the
"engine" demo.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36821
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39257
Tested-by: Keith Packard <keithp@keithp.com> (bzflag and etracer fixed)
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/gen6_vs_state.c | 26 +++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index fb4cdbaadf9..e70454416bf 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -160,6 +160,32 @@ upload_vs_state(struct brw_context *brw)
 	     GEN6_VS_STATISTICS_ENABLE |
 	     GEN6_VS_ENABLE);
    ADVANCE_BATCH();
+
+   /* Based on my reading of the simulator, the VS constants don't get
+    * pulled into the VS FF unit until an appropriate pipeline flush
+    * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
+    * references to them into a little FIFO.  The flushes are common,
+    * but don't reliably happen between this and a 3DPRIMITIVE, causing
+    * the primitive to use the wrong constants.  Then the FIFO
+    * containing the constant setup gets added to again on the next
+    * constants change, and eventually when a flush does happen the
+    * unit is overwhelmed by constant changes and dies.
+    *
+    * To avoid this, send a PIPE_CONTROL down the line that will
+    * update the unit immediately loading the constants.  The flush
+    * type bits here were those set by the STATE_BASE_ADDRESS whose
+    * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
+    * bug reports that led to this workaround, and may be more than
+    * what is strictly required to avoid the issue.
+    */
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
+	     PIPE_CONTROL_INSTRUCTION_FLUSH |
+	     PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state gen6_vs_state = {

From 2d960d3f4e8fcf7819bb94ba4913ec0a2a723daa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 15 Jul 2011 20:16:20 +0200
Subject: [PATCH 027/600] g3dvl: remove unused vertex shader inputs

See also comments in the code.
---
 src/gallium/auxiliary/vl/vl_idct.c | 3 +--
 src/gallium/auxiliary/vl/vl_mc.c   | 5 +++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 645d06a0925..ad786145392 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -143,7 +143,7 @@ static void *
 create_mismatch_vert_shader(struct vl_idct *idct)
 {
    struct ureg_program *shader;
-   struct ureg_src vrect, vpos;
+   struct ureg_src vpos;
    struct ureg_src scale;
    struct ureg_dst t_tex;
    struct ureg_dst o_vpos, o_addr[2];
@@ -152,7 +152,6 @@ create_mismatch_vert_shader(struct vl_idct *idct)
    if (!shader)
       return NULL;
 
-   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
 
    t_tex = ureg_DECL_temporary(shader);
diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
index add367e3ac6..81a05b539f3 100644
--- a/src/gallium/auxiliary/vl/vl_mc.c
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -103,7 +103,7 @@ create_ref_vert_shader(struct vl_mc *r)
 {
    struct ureg_program *shader;
    struct ureg_src mv_scale;
-   struct ureg_src vrect, vmv[2];
+   struct ureg_src vmv[2];
    struct ureg_dst t_vpos;
    struct ureg_dst o_vpos, o_vmv[2];
    unsigned i;
@@ -112,7 +112,6 @@ create_ref_vert_shader(struct vl_mc *r)
    if (!shader)
       return NULL;
 
-   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP);
    vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM);
 
@@ -121,6 +120,8 @@ create_ref_vert_shader(struct vl_mc *r)
       (float)MACROBLOCK_HEIGHT / r->buffer_height)
    );
 
+   /* XXX The position is not written, which may lead to undefined rendering.
+    * XXX This is a serious bug. */
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
    o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
    o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);

From 1ad3ba4ad954b86751bd5b6ad0a431920bff9958 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Fri, 8 Jul 2011 10:34:38 -0700
Subject: [PATCH 028/600] glsl: Make prototype_string publicly available

Also clarify the documentation for one of the parameters.

Reviewed-by: Paul Berry <stereotype441@gmail.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/ast_function.cpp | 6 ++++--
 src/glsl/ir.h             | 4 ++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 60a2c617f70..bdb73f48706 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -62,8 +62,10 @@ process_parameters(exec_list *instructions, exec_list *actual_parameters,
  *
  * \param return_type Return type of the function.  May be \c NULL.
  * \param name        Name of the function.
- * \param parameters  Parameter list for the function.  This may be either a
- *                    formal or actual parameter list.  Only the type is used.
+ * \param parameters  List of \c ir_instruction nodes representing the
+ *                    parameter list for the function.  This may be either a
+ *                    formal (\c ir_variable) or actual (\c ir_rvalue)
+ *                    parameter list.  Only the type is used.
  *
  * \return
  * A ralloced string representing the prototype of the function.
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 80ad3dd295e..9f277380c7d 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1669,4 +1669,8 @@ ir_has_call(ir_instruction *ir);
 extern void
 do_set_program_inouts(exec_list *instructions, struct gl_program *prog);
 
+extern char *
+prototype_string(const glsl_type *return_type, const char *name,
+		 exec_list *parameters);
+
 #endif /* IR_H */

From 02c5ae1b3fef75d5c0a715313a69e6b95ebd5b95 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 11 Jul 2011 10:46:01 -0700
Subject: [PATCH 029/600] glsl: Reject shaders that contain static recursion

The GLSL 1.20 and later specs say:

    "Recursion is not allowed, not even statically. Static recursion is
    present if the static function call graph of the program contains
    cycles."

Recursion is detected and rejected both a compile-time and at
link-time.  The complie-time check happens to detect some cases that
may be removed by various optimization passes.  The spec doesn't seem
to allow this, but other vendors (e.g., NVIDIA) appear to only check
at link-time after all optimizations.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=33885
Reviewed-by: Paul Berry <stereotype441@gmail.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/Makefile                         |   1 +
 src/glsl/ast_to_hir.cpp                   |   2 +
 src/glsl/ir.h                             |  26 ++
 src/glsl/ir_function_detect_recursion.cpp | 371 ++++++++++++++++++++++
 src/glsl/linker.cpp                       |   4 +
 5 files changed, 404 insertions(+)
 create mode 100644 src/glsl/ir_function_detect_recursion.cpp

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index e0776c1b55d..d1422c2a4d6 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -39,6 +39,7 @@ CXX_SOURCES = \
 	ir.cpp \
 	ir_expression_flattening.cpp \
 	ir_function_can_inline.cpp \
+	ir_function_detect_recursion.cpp \
 	ir_function.cpp \
 	ir_hierarchical_visitor.cpp \
 	ir_hv_accept.cpp \
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 2312c297c40..c0524bf0bcc 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -83,6 +83,8 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
 
    foreach_list_typed (ast_node, ast, link, & state->translation_unit)
       ast->hir(instructions, state);
+
+   detect_recursion_unlinked(state, instructions);
 }
 
 
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 9f277380c7d..50a9d6e1991 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1635,6 +1635,32 @@ visit_exec_list(exec_list *list, ir_visitor *visitor);
  */
 void validate_ir_tree(exec_list *instructions);
 
+struct _mesa_glsl_parse_state;
+struct gl_shader_program;
+
+/**
+ * Detect whether an unlinked shader contains static recursion
+ *
+ * If the list of instructions is determined to contain static recursion,
+ * \c _mesa_glsl_error will be called to emit error messages for each function
+ * that is in the recursion cycle.
+ */
+void
+detect_recursion_unlinked(struct _mesa_glsl_parse_state *state,
+			  exec_list *instructions);
+
+/**
+ * Detect whether a linked shader contains static recursion
+ *
+ * If the list of instructions is determined to contain static recursion,
+ * \c link_error_printf will be called to emit error messages for each function
+ * that is in the recursion cycle.  In addition,
+ * \c gl_shader_program::LinkStatus will be set to false.
+ */
+void
+detect_recursion_linked(struct gl_shader_program *prog,
+			exec_list *instructions);
+
 /**
  * Make a clone of each IR instruction in a list
  *
diff --git a/src/glsl/ir_function_detect_recursion.cpp b/src/glsl/ir_function_detect_recursion.cpp
new file mode 100644
index 00000000000..44a1cd0b950
--- /dev/null
+++ b/src/glsl/ir_function_detect_recursion.cpp
@@ -0,0 +1,371 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_function_detect_recursion.cpp
+ * Determine whether a shader contains static recursion.
+ *
+ * Consider the (possibly disjoint) graph of function calls in a shader.  If a
+ * program contains recursion, this graph will contain a cycle.  If a function
+ * is part of a cycle, it will have a caller and it will have a callee (it
+ * calls another function).
+ *
+ * To detect recursion, the function call graph is constructed.  The graph is
+ * repeatedly reduced by removing any function that either has no callees
+ * (leaf functions) or has no caller.  Eventually the only functions that
+ * remain will be the functions in the cycles.
+ *
+ * The GLSL spec is a bit wishy-washy about recursion.
+ *
+ * From page 39 (page 45 of the PDF) of the GLSL 1.10 spec:
+ *
+ *     "Behavior is undefined if recursion is used. Recursion means having any
+ *     function appearing more than once at any one time in the run-time stack
+ *     of function calls. That is, a function may not call itself either
+ *     directly or indirectly. Compilers may give diagnostic messages when
+ *     this is detectable at compile time, but not all such cases can be
+ *     detected at compile time."
+ *
+ * From page 79 (page 85 of the PDF):
+ *
+ *     "22) Should recursion be supported?
+ *
+ *      DISCUSSION: Probably not necessary, but another example of limiting
+ *      the language based on how it would directly map to hardware. One
+ *      thought is that recursion would benefit ray tracing shaders. On the
+ *      other hand, many recursion operations can also be implemented with the
+ *      user managing the recursion through arrays. RenderMan doesn't support
+ *      recursion. This could be added at a later date, if it proved to be
+ *      necessary.
+ *
+ *      RESOLVED on September 10, 2002: Implementations are not required to
+ *      support recursion.
+ *
+ *      CLOSED on September 10, 2002."
+ *
+ * From page 79 (page 85 of the PDF):
+ *
+ *     "56) Is it an error for an implementation to support recursion if the
+ *     specification says recursion is not supported?
+ *
+ *     ADDED on September 10, 2002.
+ *
+ *     DISCUSSION: This issues is related to Issue (22). If we say that
+ *     recursion (or some other piece of functionality) is not supported, is
+ *     it an error for an implementation to support it? Perhaps the
+ *     specification should remain silent on these kind of things so that they
+ *     could be gracefully added later as an extension or as part of the
+ *     standard.
+ *
+ *     RESOLUTION: Languages, in general, have programs that are not
+ *     well-formed in ways a compiler cannot detect. Portability is only
+ *     ensured for well-formed programs. Detecting recursion is an example of
+ *     this. The language will say a well-formed program may not recurse, but
+ *     compilers are not forced to detect that recursion may happen.
+ *
+ *     CLOSED: November 29, 2002."
+ *
+ * In GLSL 1.10 the behavior of recursion is undefined.  Compilers don't have
+ * to reject shaders (at compile-time or link-time) that contain recursion.
+ * Instead they could work, or crash, or kill a kitten.
+ *
+ * From page 44 (page 50 of the PDF) of the GLSL 1.20 spec:
+ *
+ *     "Recursion is not allowed, not even statically. Static recursion is
+ *     present if the static function call graph of the program contains
+ *     cycles."
+ *
+ * This langauge clears things up a bit, but it still leaves a lot of
+ * questions unanswered.
+ *
+ *     - Is the error generated at compile-time or link-time?
+ *
+ *     - Is it an error to have a recursive function that is never statically
+ *       called by main or any function called directly or indirectly by main?
+ *       Technically speaking, such a function is not in the "static function
+ *       call graph of the program" at all.
+ *
+ * \bug
+ * If a shader has multiple cycles, this algorithm may erroneously complain
+ * about functions that aren't in any cycle, but are in the part of the call
+ * tree that connects them.  For example, if the call graph consists of a
+ * cycle between A and B, and a cycle between D and E, and B also calls C
+ * which calls D, then this algorithm will report C as a function which "has
+ * static recursion" even though it is not part of any cycle.
+ *
+ * A better algorithm for cycle detection that doesn't have this drawback can
+ * be found here:
+ *
+ * http://en.wikipedia.org/wiki/Tarjan%E2%80%99s_strongly_connected_components_algorithm
+ *
+ * \author Ian Romanick <ian.d.romanick@intel.com>
+ */
+#include "main/core.h"
+#include "ir.h"
+#include "glsl_parser_extras.h"
+#include "linker.h"
+#include "program/hash_table.h"
+
+struct call_node : public exec_node {
+   class function *func;
+};
+
+class function {
+public:
+   function(ir_function_signature *sig)
+      : sig(sig)
+   {
+      /* empty */
+   }
+
+
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   /* If the user *does* call delete, that's OK, we will just
+    * ralloc_free in that case. */
+   static void operator delete(void *node)
+   {
+      ralloc_free(node);
+   }
+
+   ir_function_signature *sig;
+
+   /** List of functions called by this function. */
+   exec_list callees;
+
+   /** List of functions that call this function. */
+   exec_list callers;
+};
+
+class has_recursion_visitor : public ir_hierarchical_visitor {
+public:
+   has_recursion_visitor()
+      : current(NULL)
+   {
+      this->mem_ctx = ralloc_context(NULL);
+      this->function_hash = hash_table_ctor(0, hash_table_pointer_hash,
+					    hash_table_pointer_compare);
+   }
+
+   ~has_recursion_visitor()
+   {
+      hash_table_dtor(this->function_hash);
+      ralloc_free(this->mem_ctx);
+   }
+
+   function *get_function(ir_function_signature *sig)
+   {
+      function *f = (function *) hash_table_find(this->function_hash, sig);
+      if (f == NULL) {
+	 f = new(mem_ctx) function(sig);
+	 hash_table_insert(this->function_hash, f, sig);
+      }
+
+      return f;
+   }
+
+   virtual ir_visitor_status visit_enter(ir_function_signature *sig)
+   {
+      this->current = this->get_function(sig);
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_leave(ir_function_signature *sig)
+   {
+      (void) sig;
+      this->current = NULL;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_enter(ir_call *call)
+   {
+      /* At global scope this->current will be NULL.  Since there is no way to
+       * call global scope, it can never be part of a cycle.  Don't bother
+       * adding calls from global scope to the graph.
+       */
+      if (this->current == NULL)
+	 return visit_continue;
+
+      function *const target = this->get_function(call->get_callee());
+
+      /* Create a link from the caller to the callee.
+       */
+      call_node *node = new(mem_ctx) call_node;
+      node->func = target;
+      this->current->callees.push_tail(node);
+
+      /* Create a link from the callee to the caller.
+       */
+      node = new(mem_ctx) call_node;
+      node->func = this->current;
+      target->callers.push_tail(node);
+      return visit_continue;
+   }
+
+   function *current;
+   struct hash_table *function_hash;
+   void *mem_ctx;
+   bool progress;
+};
+
+static void
+destroy_links(exec_list *list, function *f)
+{
+   foreach_list_safe(node, list) {
+      struct call_node *n = (struct call_node *) node;
+
+      /* If this is the right function, remove it.  Note that the loop cannot
+       * terminate now.  There can be multiple links to a function if it is
+       * either called multiple times or calls multiple times.
+       */
+      if (n->func == f)
+	 n->remove();
+   }
+}
+
+
+/**
+ * Remove a function if it has either no in or no out links
+ */
+static void
+remove_unlinked_functions(const void *key, void *data, void *closure)
+{
+   has_recursion_visitor *visitor = (has_recursion_visitor *) closure;
+   function *f = (function *) data;
+
+   if (f->callers.is_empty() || f->callees.is_empty()) {
+      while (!f->callers.is_empty()) {
+	 struct call_node *n = (struct call_node *) f->callers.pop_head();
+	 destroy_links(& n->func->callees, f);
+      }
+
+      while (!f->callees.is_empty()) {
+	 struct call_node *n = (struct call_node *) f->callees.pop_head();
+	 destroy_links(& n->func->callers, f);
+      }
+
+      hash_table_remove(visitor->function_hash, key);
+      visitor->progress = true;
+   }
+}
+
+
+static void
+emit_errors_unlinked(const void *key, void *data, void *closure)
+{
+   struct _mesa_glsl_parse_state *state =
+      (struct _mesa_glsl_parse_state *) closure;
+   function *f = (function *) data;
+   YYLTYPE loc;
+
+   char *proto = prototype_string(f->sig->return_type,
+				  f->sig->function_name(),
+				  &f->sig->parameters);
+
+   memset(&loc, 0, sizeof(loc));
+   _mesa_glsl_error(&loc, state,
+		    "function `%s' has static recursion.",
+		    proto);
+   ralloc_free(proto);
+}
+
+
+static void
+emit_errors_linked(const void *key, void *data, void *closure)
+{
+   struct gl_shader_program *prog =
+      (struct gl_shader_program *) closure;
+   function *f = (function *) data;
+
+   char *proto = prototype_string(f->sig->return_type,
+				  f->sig->function_name(),
+				  &f->sig->parameters);
+
+   linker_error_printf(prog,
+		       "function `%s' has static recursion.\n",
+		       proto);
+   ralloc_free(proto);
+   prog->LinkStatus = false;
+}
+
+
+void
+detect_recursion_unlinked(struct _mesa_glsl_parse_state *state,
+			  exec_list *instructions)
+{
+   has_recursion_visitor v;
+
+   /* Collect all of the information about which functions call which other
+    * functions.
+    */
+   v.run(instructions);
+
+   /* Remove from the set all of the functions that either have no caller or
+    * call no other functions.  Repeat until no functions are removed.
+    */
+   do {
+      v.progress = false;
+      hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v);
+   } while (v.progress);
+
+
+   /* At this point any functions still in the hash must be part of a cycle.
+    */
+   hash_table_call_foreach(v.function_hash, emit_errors_unlinked, state);
+}
+
+
+void
+detect_recursion_linked(struct gl_shader_program *prog,
+			exec_list *instructions)
+{
+   has_recursion_visitor v;
+
+   /* Collect all of the information about which functions call which other
+    * functions.
+    */
+   v.run(instructions);
+
+   /* Remove from the set all of the functions that either have no caller or
+    * call no other functions.  Repeat until no functions are removed.
+    */
+   do {
+      v.progress = false;
+      hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v);
+   } while (v.progress);
+
+
+   /* At this point any functions still in the hash must be part of a cycle.
+    */
+   hash_table_call_foreach(v.function_hash, emit_errors_linked, prog);
+}
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 5ec08446d16..fe570b6cc45 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1702,6 +1702,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
       if (prog->_LinkedShaders[i] == NULL)
 	 continue;
 
+      detect_recursion_linked(prog, prog->_LinkedShaders[i]->ir);
+      if (!prog->LinkStatus)
+	 goto done;
+
       while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, 32))
 	 ;
    }

From 76bccaff0c54aed10ffbc7c7dc744f1708921409 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Wed, 20 Jul 2011 20:16:27 -0700
Subject: [PATCH 030/600] glsl: Add ir_function_detect_recursion.cpp to
 SConscript.

---
 src/glsl/SConscript | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/SConscript b/src/glsl/SConscript
index 1441cc74bd8..ea104abb823 100644
--- a/src/glsl/SConscript
+++ b/src/glsl/SConscript
@@ -50,6 +50,7 @@ glsl_sources = [
     'ir.cpp',
     'ir_expression_flattening.cpp',
     'ir_function_can_inline.cpp',
+    'ir_function_detect_recursion.cpp',
     'ir_function.cpp',
     'ir_hierarchical_visitor.cpp',
     'ir_hv_accept.cpp',

From ea316c5e060cbd92b34e0d794c0707d4ca79e6e8 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 21 Jul 2011 10:39:41 +0200
Subject: [PATCH 031/600] nouveau: hook up video decoding with nouveau_context

This doesn't include nvfx since its context struct is not derived
from common nouveau_context (yet).
---
 src/gallium/drivers/nouveau/Makefile          |  3 +-
 src/gallium/drivers/nouveau/nouveau_context.h |  3 ++
 src/gallium/drivers/nouveau/nouveau_screen.h  |  1 +
 src/gallium/drivers/nouveau/nouveau_video.c   | 39 +++++++++++++++++++
 src/gallium/drivers/nv50/nv50_context.c       |  2 +
 src/gallium/drivers/nv50/nv50_screen.c        |  2 +
 src/gallium/drivers/nvc0/nvc0_context.c       |  2 +
 src/gallium/drivers/nvc0/nvc0_screen.c        |  5 +++
 8 files changed, 56 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/drivers/nouveau/nouveau_video.c

diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile
index 3210d1ff77b..aae6d9889bb 100644
--- a/src/gallium/drivers/nouveau/Makefile
+++ b/src/gallium/drivers/nouveau/Makefile
@@ -10,6 +10,7 @@ LIBRARY_INCLUDES = \
 C_SOURCES = nouveau_screen.c \
             nouveau_fence.c \
             nouveau_mm.c \
-            nouveau_buffer.c
+            nouveau_buffer.c \
+            nouveau_video.c
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h
index 696e0d3f24e..19bf7c84ac7 100644
--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -23,4 +23,7 @@ nouveau_context(struct pipe_context *pipe)
    return (struct nouveau_context *)pipe;
 }
 
+void
+nouveau_context_init_vdec(struct nouveau_context *);
+
 #endif
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index d910809a0ec..cf291c6c595 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -76,6 +76,7 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
 int nouveau_screen_init(struct nouveau_screen *, struct nouveau_device *);
 void nouveau_screen_fini(struct nouveau_screen *);
 
+void nouveau_screen_init_vdec(struct nouveau_screen *);
 
 
 #ifndef NOUVEAU_NVC0
diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c
new file mode 100644
index 00000000000..32f038dae61
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_video.c
@@ -0,0 +1,39 @@
+
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+
+#include "nouveau/nouveau_screen.h"
+#include "nouveau/nouveau_context.h"
+
+static int
+nouveau_screen_get_video_param(struct pipe_screen *pscreen,
+                               enum pipe_video_profile profile,
+                               enum pipe_video_cap param)
+{
+   switch (param) {
+   case PIPE_VIDEO_CAP_SUPPORTED:
+      return vl_profile_supported(pscreen, profile);
+   case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+      return 1;
+   case PIPE_VIDEO_CAP_MAX_WIDTH:
+   case PIPE_VIDEO_CAP_MAX_HEIGHT:
+      return vl_video_buffer_max_size(pscreen);
+   default:
+      debug_printf("unknown video param: %d\n", param);
+      return 0;
+   }
+}
+
+void
+nouveau_screen_init_vdec(struct nouveau_screen *screen)
+{
+   screen->base.get_video_param = nouveau_screen_get_video_param;
+   screen->base.is_video_format_supported = vl_video_buffer_is_format_supported;
+}
+
+void
+nouveau_context_init_vdec(struct nouveau_context *nv)
+{
+   nv->pipe.create_video_decoder = vl_create_decoder;
+   nv->pipe.create_video_buffer = vl_video_buffer_create;
+}
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index ac3e361a446..0d464063b5b 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -149,6 +149,8 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
    assert(nv50->draw);
    draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50));
 
+   nouveau_context_init_vdec(&nv50->base);
+
    return pipe;
 }
 
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index a697ff5ecf7..4139b85a9ae 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -315,6 +315,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 
    nv50_screen_init_resource_functions(pscreen);
 
+   nouveau_screen_init_vdec(&screen->base);
+
    ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
                         &screen->fence.bo);
    if (ret)
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 983db23eedb..360afbb943e 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -150,6 +150,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
    assert(nvc0->draw);
    draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0));
 
+   nouveau_context_init_vdec(&nvc0->base);
+
    return pipe;
 }
 
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index 605a0b04018..5d1b324dbff 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -24,6 +24,9 @@
 #include "util/u_format_s3tc.h"
 #include "pipe/p_screen.h"
 
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+
 #include "nvc0_context.h"
 #include "nvc0_screen.h"
 
@@ -373,6 +376,8 @@ nvc0_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 
    nvc0_screen_init_resource_functions(pscreen);
 
+   nouveau_screen_init_vdec(&screen->base);
+
    ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
                         &screen->fence.bo);
    if (ret)

From 000896c0bb99f356e52854608a29476d3ade387c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 19 Jul 2011 03:05:07 +0200
Subject: [PATCH 032/600] mesa: GLES2 should return different error enums for
 invalid fbo queries

ES 2.0.25 page 127 says:

  If the value of FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is NONE, then
  querying any other pname will generate INVALID_ENUM.

See also:
b9e9df78a03edb35472c2e231aef4747e09db792

NOTE: This is a candidate for the 7.10 and 7.11 branches.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/main/fbobject.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 84969360d92..82eb7fb718d 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -2134,10 +2134,14 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
 {
    const struct gl_renderbuffer_attachment *att;
    struct gl_framebuffer *buffer;
+   GLenum err;
    GET_CURRENT_CONTEXT(ctx);
 
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
+   /* The error differs in GL andd GLES. */
+   err = ctx->API == API_OPENGL ? GL_INVALID_OPERATION : GL_INVALID_ENUM;
+
    buffer = get_framebuffer_target(ctx, target);
    if (!buffer) {
       _mesa_error(ctx, GL_INVALID_ENUM,
@@ -2188,7 +2192,12 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
       }
       else {
          assert(att->Type == GL_NONE);
-         *params = 0;
+         if (ctx->API == API_OPENGL) {
+            *params = 0;
+         } else {
+            _mesa_error(ctx, GL_INVALID_ENUM,
+                        "glGetFramebufferAttachmentParameterivEXT(pname)");
+         }
       }
       return;
    case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_EXT:
@@ -2196,7 +2205,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
 	 *params = att->TextureLevel;
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2214,7 +2223,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
          }
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2232,7 +2241,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
          }
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2246,7 +2255,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2267,7 +2276,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
          return;
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else {
@@ -2301,7 +2310,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else if (att->Type == GL_NONE) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameterivEXT(pname)");
       }
       else if (att->Texture) {

From afc160e1c8f87d0a76e41f1100d528d8ab82ecc4 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Wed, 20 Jul 2011 18:52:33 +0800
Subject: [PATCH 033/600] u_vbuf_mgr: restore buffer offsets

u_vbuf_upload_buffers modifies the buffer offsets.  If they are not
restored, and any of the vertex formats is not supported natively, the
next u_vbuf_mgr_draw_begin call will translate the vertex buffers with
incorrect buffer offsets.
---
 src/gallium/auxiliary/util/u_vbuf_mgr.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c
index 374fc336b83..19eb689cfb2 100644
--- a/src/gallium/auxiliary/util/u_vbuf_mgr.c
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c
@@ -79,6 +79,8 @@ struct u_vbuf_mgr_priv {
    void *saved_ve, *fallback_ve;
    boolean ve_binding_lock;
 
+   unsigned saved_buffer_offset[PIPE_MAX_ATTRIBS];
+
    boolean any_user_vbs;
    boolean incompatible_vb_layout;
 };
@@ -488,6 +490,7 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb,
 
       pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, vb->buffer);
       pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL);
+      mgr->saved_buffer_offset[i] = vb->buffer_offset;
 
       if (!vb->buffer) {
          continue;
@@ -647,6 +650,13 @@ u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgrb,
 void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgrb)
 {
    struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+   unsigned i;
+
+   /* buffer offsets were modified in u_vbuf_upload_buffers */
+   if (mgr->any_user_vbs) {
+      for (i = 0; i < mgr->b.nr_vertex_buffers; i++)
+         mgr->b.vertex_buffer[i].buffer_offset = mgr->saved_buffer_offset[i];
+   }
 
    if (mgr->fallback_ve) {
       u_vbuf_translate_end(mgr);

From f16d97feaa394826456e27250d5dfdb24df5cd57 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 21 Jul 2011 09:55:22 -0600
Subject: [PATCH 034/600] softpipe: rename a function

---
 src/gallium/drivers/softpipe/sp_quad_pipe.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c
index 2cfd02a22c6..addd47e2920 100644
--- a/src/gallium/drivers/softpipe/sp_quad_pipe.c
+++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c
@@ -30,9 +30,9 @@
 #include "sp_state.h"
 #include "pipe/p_shader_tokens.h"
 
+
 static void
-sp_push_quad_first( struct softpipe_context *sp,
-                    struct quad_stage *quad )
+insert_stage_at_head(struct softpipe_context *sp, struct quad_stage *quad)
 {
    quad->next = sp->quad.first;
    sp->quad.first = quad;
@@ -53,17 +53,17 @@ sp_build_quad_pipeline(struct softpipe_context *sp)
    sp->quad.first = sp->quad.blend;
 
    if (early_depth_test) {
-      sp_push_quad_first( sp, sp->quad.shade );
-      sp_push_quad_first( sp, sp->quad.depth_test );
+      insert_stage_at_head( sp, sp->quad.shade );
+      insert_stage_at_head( sp, sp->quad.depth_test );
    }
    else {
-      sp_push_quad_first( sp, sp->quad.depth_test );
-      sp_push_quad_first( sp, sp->quad.shade );
+      insert_stage_at_head( sp, sp->quad.depth_test );
+      insert_stage_at_head( sp, sp->quad.shade );
    }
 
 #if !DO_PSTIPPLE_IN_DRAW_MODULE
    if (sp->rasterizer->poly_stipple_enable)
-      sp_push_quad_first( sp, sp->quad.pstipple );
+      insert_stage_at_head( sp, sp->quad.pstipple );
 #endif
 }
 

From 9c1319d31d85f1e5bc61eef1bc963584623d0d51 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 21 Jul 2011 09:55:22 -0600
Subject: [PATCH 035/600] softpipe: remove obsolete comment

---
 src/gallium/drivers/softpipe/sp_fs_exec.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 346e1b402ba..85e7141486a 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -193,10 +193,6 @@ softpipe_create_fs_exec(struct softpipe_context *softpipe,
 {
    struct sp_exec_fragment_shader *shader;
 
-   /* Decide whether we'll be codegenerating this shader and if so do
-    * that now.
-    */
-
    shader = CALLOC_STRUCT(sp_exec_fragment_shader);
    if (!shader)
       return NULL;

From 2253906da3c506bb5378a8f2fa203ed0c9021171 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 21 Jul 2011 09:55:22 -0600
Subject: [PATCH 036/600] tgsi: add info fields for fragcoord origin, center,
 etc

---
 src/gallium/auxiliary/tgsi/tgsi_scan.c | 38 +++++++++++++++++++-------
 src/gallium/auxiliary/tgsi/tgsi_scan.h |  3 ++
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 83c6ac75e54..f165f8240e6 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -200,19 +200,20 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
             info->file_max[file] = MAX2(info->file_max[file], (int)reg);
          }
          break;
+
       case TGSI_TOKEN_TYPE_PROPERTY:
-      {
-         const struct tgsi_full_property *fullprop
-            = &parse.FullToken.FullProperty;
+         {
+            const struct tgsi_full_property *fullprop
+               = &parse.FullToken.FullProperty;
 
-         info->properties[info->num_properties].name =
-            fullprop->Property.PropertyName;
-         memcpy(info->properties[info->num_properties].data,
-                fullprop->u, 8 * sizeof(unsigned));;
+            info->properties[info->num_properties].name =
+               fullprop->Property.PropertyName;
+            memcpy(info->properties[info->num_properties].data,
+                   fullprop->u, 8 * sizeof(unsigned));;
 
-         ++info->num_properties;
-      }
-      break;
+            ++info->num_properties;
+         }
+         break;
 
       default:
          assert( 0 );
@@ -222,6 +223,23 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
    info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] ||
                       info->opcode_count[TGSI_OPCODE_KILP]);
 
+   /* extract simple properties */
+   for (i = 0; i < info->num_properties; ++i) {
+      switch (info->properties[i].name) {
+      case TGSI_PROPERTY_FS_COORD_ORIGIN:
+         info->origin_lower_left = info->properties[i].data[0];
+         break;
+      case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
+         info->pixel_center_integer = info->properties[i].data[0];
+         break;
+      case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
+         info->color0_writes_all_cbufs = info->properties[i].data[0];
+         break;
+      default:
+         ;
+      }
+   }
+
    tgsi_parse_free (&parse);
 }
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 53ab3d509dd..d6e593b3968 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -68,6 +68,9 @@ struct tgsi_shader_info
    boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
    boolean uses_kill;  /**< KIL or KILP instruction used? */
    boolean uses_instanceid;
+   boolean origin_lower_left;
+   boolean pixel_center_integer;
+   boolean color0_writes_all_cbufs;
 
    /**
     * Bitmask indicating which register files are accessed with

From 4736c0ba8670637970a971da6ec83cf1d2620a32 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 21 Jul 2011 09:55:22 -0600
Subject: [PATCH 037/600] softpipe: use tgsi_shader_info fields for fragcoord
 origin, center, etc.

---
 src/gallium/drivers/softpipe/sp_quad_blend.c   | 2 +-
 src/gallium/drivers/softpipe/sp_setup.c        | 8 ++++----
 src/gallium/drivers/softpipe/sp_state.h        | 3 ---
 src/gallium/drivers/softpipe/sp_state_shader.c | 9 ---------
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index 76cfc0bf51c..4a4e0022110 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -797,7 +797,7 @@ blend_fallback(struct quad_stage *qs,
    unsigned cbuf;
    boolean write_all;
 
-   write_all = softpipe->fs->color0_writes_all_cbufs;
+   write_all = softpipe->fs->info.color0_writes_all_cbufs;
 
    for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) 
    {
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 0ce28f4c6ee..48f29f87661 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -570,15 +570,15 @@ setup_fragcoord_coeff(struct setup_context *setup, uint slot)
 {
    struct sp_fragment_shader* spfs = setup->softpipe->fs;
    /*X*/
-   setup->coef[slot].a0[0] = spfs->pixel_center_integer ? 0.0 : 0.5;
+   setup->coef[slot].a0[0] = spfs->info.pixel_center_integer ? 0.0 : 0.5;
    setup->coef[slot].dadx[0] = 1.0;
    setup->coef[slot].dady[0] = 0.0;
    /*Y*/
    setup->coef[slot].a0[1] =
-		   (spfs->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0)
-		   + (spfs->pixel_center_integer ? 0.0 : 0.5);
+		   (spfs->info.origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0)
+		   + (spfs->info.pixel_center_integer ? 0.0 : 0.5);
    setup->coef[slot].dadx[1] = 0.0;
-   setup->coef[slot].dady[1] = spfs->origin_lower_left ? -1.0 : 1.0;
+   setup->coef[slot].dady[1] = spfs->info.origin_lower_left ? -1.0 : 1.0;
    /*Z*/
    setup->coef[slot].a0[2] = setup->posCoef.a0[2];
    setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index bb19f8cff20..6c14dd132e9 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -72,9 +72,6 @@ struct sp_fragment_shader {
 
    struct draw_fragment_shader *draw_shader;
 
-   boolean origin_lower_left; /**< fragment shader uses lower left position origin? */
-   boolean pixel_center_integer; /**< fragment shader uses integer pixel center? */
-   boolean color0_writes_all_cbufs; /**< fragment shader writes color0 to all bound cbufs */
    void (*prepare)( const struct sp_fragment_shader *shader,
 		    struct tgsi_exec_machine *machine,
 		    struct tgsi_sampler **samplers);
diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c
index 3dec5de3cc4..80af2578839 100644
--- a/src/gallium/drivers/softpipe/sp_state_shader.c
+++ b/src/gallium/drivers/softpipe/sp_state_shader.c
@@ -73,15 +73,6 @@ softpipe_create_fs_state(struct pipe_context *pipe,
    /* get/save the summary info for this shader */
    tgsi_scan_shader(templ->tokens, &state->info);
 
-   for (i = 0; i < state->info.num_properties; ++i) {
-      if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN)
-         state->origin_lower_left = state->info.properties[i].data[0];
-      else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COORD_PIXEL_CENTER)
-	 state->pixel_center_integer = state->info.properties[i].data[0];
-      else if (state->info.properties[i].name == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS)
-	 state->color0_writes_all_cbufs = state->info.properties[i].data[0];
-   }
-
    return state;
 }
 

From 3dde6be908d827f4d6d54e0968ae83c2c4dfa87c Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 21 Jul 2011 09:55:22 -0600
Subject: [PATCH 038/600] util: assorted updates to polygon stipple helper

---
 src/gallium/auxiliary/util/u_pstipple.c | 43 +++++++++++++++++++------
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c
index f79a6938d1d..ac0df8c1a9c 100644
--- a/src/gallium/auxiliary/util/u_pstipple.c
+++ b/src/gallium/auxiliary/util/u_pstipple.c
@@ -52,6 +52,7 @@
 
 #include "tgsi/tgsi_transform.h"
 #include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
 
 /** Approx number of new tokens for instructions in pstip_transform_inst() */
 #define NUM_NEW_TOKENS 50
@@ -175,6 +176,7 @@ util_pstipple_create_sampler(struct pipe_context *pipe)
  */
 struct pstip_transform_context {
    struct tgsi_transform_context base;
+   struct tgsi_shader_info info;
    uint tempsUsed;  /**< bitmask */
    int wincoordInput;
    int maxInput;
@@ -183,12 +185,13 @@ struct pstip_transform_context {
    int texTemp;  /**< temp registers */
    int numImmed;
    boolean firstInstruction;
+   uint coordOrigin;
 };
 
 
 /**
  * TGSI declaration transform callback.
- * Look for a free sampler, a free input attrib, and two free temp regs.
+ * Track samplers used, temps used, inputs used.
  */
 static void
 pstip_transform_decl(struct tgsi_transform_context *ctx,
@@ -197,10 +200,11 @@ pstip_transform_decl(struct tgsi_transform_context *ctx,
    struct pstip_transform_context *pctx =
       (struct pstip_transform_context *) ctx;
 
+   /* XXX we can use tgsi_shader_info instead of some of this */
+
    if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
       uint i;
-      for (i = decl->Range.First;
-           i <= decl->Range.Last; i++) {
+      for (i = decl->Range.First; i <= decl->Range.Last; i++) {
          pctx->samplersUsed |= 1 << i;
       }
    }
@@ -211,8 +215,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx,
    }
    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
       uint i;
-      for (i = decl->Range.First;
-           i <= decl->Range.Last; i++) {
+      for (i = decl->Range.First; i <= decl->Range.Last; i++) {
          pctx->tempsUsed |= (1 << i);
       }
    }
@@ -243,8 +246,16 @@ free_bit(uint bitfield)
 
 /**
  * TGSI instruction transform callback.
- * Replace writes to result.color w/ a temp reg.
- * Upon END instruction, insert texture sampling code for antialiasing.
+ * Before the first instruction, insert our new code to sample the
+ * stipple texture (using the fragment coord register) then kill the
+ * fragment if the stipple texture bit is off.
+ *
+ * Insert:
+ *   declare new registers
+ *   MUL texTemp, INPUT[wincoord], 1/32;
+ *   TEX texTemp, texTemp, sampler;
+ *   KIL -texTemp;   # if -texTemp < 0, KILL fragment
+ *   [...original code...]
  */
 static void
 pstip_transform_inst(struct tgsi_transform_context *ctx,
@@ -261,7 +272,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
       uint i;
       int wincoordInput;
 
-      /* find free sampler */
+      /* find free texture sampler */
       pctx->freeSampler = free_bit(pctx->samplersUsed);
       if (pctx->freeSampler >= PIPE_MAX_SAMPLERS)
          pctx->freeSampler = PIPE_MAX_SAMPLERS - 1;
@@ -271,7 +282,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
       else
          wincoordInput = pctx->wincoordInput;
 
-      /* find one free temp reg */
+      /* find one free temp register */
       for (i = 0; i < 32; i++) {
          if ((pctx->tempsUsed & (1 << i)) == 0) {
             /* found a free temp */
@@ -397,6 +408,7 @@ util_pstipple_create_fragment_shader(struct pipe_context *pipe,
    struct pipe_shader_state *new_fs;
    struct pstip_transform_context transform;
    const uint newLen = tgsi_num_tokens(fs->tokens) + NUM_NEW_TOKENS;
+   unsigned i;
 
    new_fs = MALLOC(sizeof(*new_fs));
    if (!new_fs)
@@ -408,22 +420,33 @@ util_pstipple_create_fragment_shader(struct pipe_context *pipe,
       return NULL;
    }
 
+   /* Setup shader transformation info/context.
+    */
    memset(&transform, 0, sizeof(transform));
    transform.wincoordInput = -1;
    transform.maxInput = -1;
    transform.texTemp = -1;
    transform.firstInstruction = TRUE;
+   transform.coordOrigin = TGSI_FS_COORD_ORIGIN_UPPER_LEFT;
    transform.base.transform_instruction = pstip_transform_inst;
    transform.base.transform_declaration = pstip_transform_decl;
    transform.base.transform_immediate = pstip_transform_immed;
 
+   tgsi_scan_shader(fs->tokens, &transform.info);
+
+   /* find fragment coordinate origin property */
+   for (i = 0; i < transform.info.num_properties; i++) {
+      if (transform.info.properties[i].name == TGSI_PROPERTY_FS_COORD_ORIGIN)
+         transform.coordOrigin = transform.info.properties[i].data[0];
+   }
+
    tgsi_transform_shader(fs->tokens,
                          (struct tgsi_token *) new_fs->tokens,
                          newLen, &transform.base);
 
 #if 0 /* DEBUG */
    tgsi_dump(fs->tokens, 0);
-   tgsi_dump(pstip_fs.tokens, 0);
+   tgsi_dump(new_fs->tokens, 0);
 #endif
 
    assert(transform.freeSampler < PIPE_MAX_SAMPLERS);

From c534f11164bbecf25eb2b1e697f9511eceb0c86f Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 21 Jul 2011 09:55:22 -0600
Subject: [PATCH 039/600] softpipe: implement fragment shader variants

We'll need shader variants to accomodate the new polygon stipple utility.
---
 src/gallium/drivers/softpipe/sp_context.h     |   1 +
 src/gallium/drivers/softpipe/sp_fs.h          |  16 ++-
 src/gallium/drivers/softpipe/sp_fs_exec.c     |  36 +++---
 src/gallium/drivers/softpipe/sp_fs_sse.c      |  25 ++--
 src/gallium/drivers/softpipe/sp_quad_blend.c  |   2 +-
 .../drivers/softpipe/sp_quad_depth_test.c     |  10 +-
 src/gallium/drivers/softpipe/sp_quad_fs.c     |  10 +-
 src/gallium/drivers/softpipe/sp_quad_pipe.c   |   6 +-
 src/gallium/drivers/softpipe/sp_setup.c       |  37 +++---
 src/gallium/drivers/softpipe/sp_state.h       |  54 ++++++---
 .../drivers/softpipe/sp_state_derived.c       |  54 +++++++--
 .../drivers/softpipe/sp_state_sampler.c       |   3 +-
 .../drivers/softpipe/sp_state_shader.c        | 114 +++++++++++++++---
 13 files changed, 251 insertions(+), 117 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index a572ee8cf00..79291abca97 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -64,6 +64,7 @@ struct softpipe_context {
    struct pipe_depth_stencil_alpha_state *depth_stencil;
    struct pipe_rasterizer_state *rasterizer;
    struct sp_fragment_shader *fs;
+   struct sp_fragment_shader_variant *fs_variant;
    struct sp_vertex_shader *vs;
    struct sp_geometry_shader *gs;
    struct sp_velems_state *velems;
diff --git a/src/gallium/drivers/softpipe/sp_fs.h b/src/gallium/drivers/softpipe/sp_fs.h
index 4792ace3a33..d46d7d5a657 100644
--- a/src/gallium/drivers/softpipe/sp_fs.h
+++ b/src/gallium/drivers/softpipe/sp_fs.h
@@ -31,17 +31,15 @@
 #ifndef SP_FS_H
 #define SP_FS_H
 
-struct sp_fragment_shader *
-softpipe_create_fs_exec(struct softpipe_context *softpipe,
-		       const struct pipe_shader_state *templ);
 
-struct sp_fragment_shader *
-softpipe_create_fs_sse(struct softpipe_context *softpipe,
-		       const struct pipe_shader_state *templ);
+struct sp_fragment_shader_variant *
+softpipe_create_fs_variant_exec(struct softpipe_context *softpipe,
+                                const struct pipe_shader_state *templ);
+
+struct sp_fragment_shader_variant *
+softpipe_create_fs_variant_sse(struct softpipe_context *softpipe,
+                               const struct pipe_shader_state *templ);
 
-struct sp_fragment_shader *
-softpipe_create_fs_llvm(struct softpipe_context *softpipe,
-			const struct pipe_shader_state *templ);
 
 struct tgsi_interp_coef;
 struct tgsi_exec_vector;
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 85e7141486a..779b8c4995c 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -42,25 +42,25 @@
 
 
 /**
- * Subclass of sp_fragment_shader
+ * Subclass of sp_fragment_shader_variant
  */
 struct sp_exec_fragment_shader
 {
-   struct sp_fragment_shader base;
+   struct sp_fragment_shader_variant base;
    /* No other members for now */
 };
 
 
 /** cast wrapper */
 static INLINE struct sp_exec_fragment_shader *
-sp_exec_fragment_shader(const struct sp_fragment_shader *base)
+sp_exec_fragment_shader(const struct sp_fragment_shader_variant *var)
 {
-   return (struct sp_exec_fragment_shader *) base;
+   return (struct sp_exec_fragment_shader *) var;
 }
 
 
 static void
-exec_prepare( const struct sp_fragment_shader *base,
+exec_prepare( const struct sp_fragment_shader_variant *var,
 	      struct tgsi_exec_machine *machine,
 	      struct tgsi_sampler **samplers )
 {
@@ -68,9 +68,9 @@ exec_prepare( const struct sp_fragment_shader *base,
     * Bind tokens/shader to the interpreter's machine state.
     * Avoid redundant binding.
     */
-   if (machine->Tokens != base->shader.tokens) {
+   if (machine->Tokens != var->tokens) {
       tgsi_exec_machine_bind_shader( machine,
-                                     base->shader.tokens,
+                                     var->tokens,
                                      PIPE_MAX_SAMPLERS,
                                      samplers );
    }
@@ -118,7 +118,7 @@ setup_pos_vector(const struct tgsi_interp_coef *coef,
  * interface:
  */
 static unsigned 
-exec_run( const struct sp_fragment_shader *base,
+exec_run( const struct sp_fragment_shader_variant *var,
 	  struct tgsi_exec_machine *machine,
 	  struct quad_header *quad )
 {
@@ -136,9 +136,9 @@ exec_run( const struct sp_fragment_shader *base,
 
    /* store outputs */
    {
-      const ubyte *sem_name = base->info.output_semantic_name;
-      const ubyte *sem_index = base->info.output_semantic_index;
-      const uint n = base->info.num_outputs;
+      const ubyte *sem_name = var->info.output_semantic_name;
+      const ubyte *sem_index = var->info.output_semantic_index;
+      const uint n = var->info.num_outputs;
       uint i;
       for (i = 0; i < n; i++) {
          switch (sem_name[i]) {
@@ -180,16 +180,16 @@ exec_run( const struct sp_fragment_shader *base,
 
 
 static void 
-exec_delete( struct sp_fragment_shader *base )
+exec_delete( struct sp_fragment_shader_variant *var )
 {
-   FREE((void *) base->shader.tokens);
-   FREE(base);
+   FREE( (void *) var->tokens );
+   FREE(var);
 }
 
 
-struct sp_fragment_shader *
-softpipe_create_fs_exec(struct softpipe_context *softpipe,
-			const struct pipe_shader_state *templ)
+struct sp_fragment_shader_variant *
+softpipe_create_fs_variant_exec(struct softpipe_context *softpipe,
+                                const struct pipe_shader_state *templ)
 {
    struct sp_exec_fragment_shader *shader;
 
@@ -197,8 +197,6 @@ softpipe_create_fs_exec(struct softpipe_context *softpipe,
    if (!shader)
       return NULL;
 
-   /* we need to keep a local copy of the tokens */
-   shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens);
    shader->base.prepare = exec_prepare;
    shader->base.run = exec_run;
    shader->base.delete = exec_delete;
diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
index 5b18cd035e3..c873af125bd 100644
--- a/src/gallium/drivers/softpipe/sp_fs_sse.c
+++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
@@ -48,11 +48,11 @@
 
 
 /**
- * Subclass of sp_fragment_shader
+ * Subclass of sp_fragment_shader_variant
  */
 struct sp_sse_fragment_shader
 {
-   struct sp_fragment_shader base;
+   struct sp_fragment_shader_variant base;
    struct x86_function sse2_program;
    tgsi_sse2_fs_function func;
    float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
@@ -61,14 +61,14 @@ struct sp_sse_fragment_shader
 
 /** cast wrapper */
 static INLINE struct sp_sse_fragment_shader *
-sp_sse_fragment_shader(const struct sp_fragment_shader *base)
+sp_sse_fragment_shader(const struct sp_fragment_shader_variant *base)
 {
    return (struct sp_sse_fragment_shader *) base;
 }
 
 
 static void
-fs_sse_prepare( const struct sp_fragment_shader *base,
+fs_sse_prepare( const struct sp_fragment_shader_variant *base,
 		struct tgsi_exec_machine *machine,
 		struct tgsi_sampler **samplers )
 {
@@ -119,7 +119,7 @@ setup_pos_vector(const struct tgsi_interp_coef *coef,
  * TODO: process >1 quad at a time
  */
 static unsigned 
-fs_sse_run( const struct sp_fragment_shader *base,
+fs_sse_run( const struct sp_fragment_shader_variant *base,
 	    struct tgsi_exec_machine *machine,
 	    struct quad_header *quad )
 {
@@ -189,7 +189,7 @@ fs_sse_run( const struct sp_fragment_shader *base,
 
 
 static void 
-fs_sse_delete( struct sp_fragment_shader *base )
+fs_sse_delete( struct sp_fragment_shader_variant *base )
 {
    struct sp_sse_fragment_shader *shader = sp_sse_fragment_shader(base);
 
@@ -198,9 +198,9 @@ fs_sse_delete( struct sp_fragment_shader *base )
 }
 
 
-struct sp_fragment_shader *
-softpipe_create_fs_sse(struct softpipe_context *softpipe,
-		       const struct pipe_shader_state *templ)
+struct sp_fragment_shader_variant *
+softpipe_create_fs_variant_sse(struct softpipe_context *softpipe,
+                               const struct pipe_shader_state *templ)
 {
    struct sp_sse_fragment_shader *shader;
 
@@ -226,7 +226,6 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe,
       return NULL;
    }
 
-   shader->base.shader.tokens = NULL; /* don't hold reference to templ->tokens */
    shader->base.prepare = fs_sse_prepare;
    shader->base.run = fs_sse_run;
    shader->base.delete = fs_sse_delete;
@@ -239,9 +238,9 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe,
 
 /* Maybe put this variant in the header file.
  */
-struct sp_fragment_shader *
-softpipe_create_fs_sse(struct softpipe_context *softpipe,
-		       const struct pipe_shader_state *templ)
+struct sp_fragment_shader_variant *
+softpipe_create_fs_variant_sse(struct softpipe_context *softpipe,
+                               const struct pipe_shader_state *templ)
 {
    return NULL;
 }
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index 4a4e0022110..04bfd14b7c6 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -797,7 +797,7 @@ blend_fallback(struct quad_stage *qs,
    unsigned cbuf;
    boolean write_all;
 
-   write_all = softpipe->fs->info.color0_writes_all_cbufs;
+   write_all = softpipe->fs_variant->info.color0_writes_all_cbufs;
 
    for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) 
    {
diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
index 89b2a91fc1f..9e98801810d 100644
--- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -726,9 +726,9 @@ depth_test_quads_fallback(struct quad_stage *qs,
                           unsigned nr)
 {
    unsigned i, pass = 0;
-   const struct sp_fragment_shader *fs = qs->softpipe->fs;
-   boolean interp_depth = !fs->info.writes_z;
-   boolean shader_stencil_ref = fs->info.writes_stencil;
+   const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
+   boolean interp_depth = !fsInfo->writes_z;
+   boolean shader_stencil_ref = fsInfo->writes_stencil;
    struct depth_data data;
 
    data.use_shader_stencil_refs = FALSE;
@@ -837,7 +837,9 @@ choose_depth_test(struct quad_stage *qs,
                   struct quad_header *quads[],
                   unsigned nr)
 {
-   boolean interp_depth = !qs->softpipe->fs->info.writes_z;
+   const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
+
+   boolean interp_depth = !fsInfo->writes_z;
 
    boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
 
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 90f4787d599..d74d6d4914e 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -74,7 +74,7 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
    struct tgsi_exec_machine *machine = softpipe->fs_machine;
 
    /* run shader */
-   return softpipe->fs->run( softpipe->fs, machine, quad );
+   return softpipe->fs_variant->run( softpipe->fs_variant, machine, quad );
 }
 
 
@@ -140,10 +140,10 @@ shade_begin(struct quad_stage *qs)
 {
    struct softpipe_context *softpipe = qs->softpipe;
 
-   softpipe->fs->prepare( softpipe->fs, 
-			  softpipe->fs_machine,
-			  (struct tgsi_sampler **)
-                             softpipe->tgsi.frag_samplers_list );
+   softpipe->fs_variant->prepare( softpipe->fs_variant, 
+                                  softpipe->fs_machine,
+                                  (struct tgsi_sampler **)
+                                  softpipe->tgsi.frag_samplers_list );
 
    qs->next->begin(qs->next);
 }
diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c
index addd47e2920..a98f8b7bde5 100644
--- a/src/gallium/drivers/softpipe/sp_quad_pipe.c
+++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c
@@ -46,9 +46,9 @@ sp_build_quad_pipeline(struct softpipe_context *sp)
       sp->depth_stencil->depth.enabled &&
       sp->framebuffer.zsbuf &&
       !sp->depth_stencil->alpha.enabled &&
-      !sp->fs->info.uses_kill &&
-      !sp->fs->info.writes_z &&
-      !sp->fs->info.writes_stencil;
+      !sp->fs_variant->info.uses_kill &&
+      !sp->fs_variant->info.writes_z &&
+      !sp->fs_variant->info.writes_stencil;
 
    sp->quad.first = sp->quad.blend;
 
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 48f29f87661..b82594ca2a5 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -568,17 +568,18 @@ tri_persp_coeff(struct setup_context *setup,
 static void
 setup_fragcoord_coeff(struct setup_context *setup, uint slot)
 {
-   struct sp_fragment_shader* spfs = setup->softpipe->fs;
+   const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
+
    /*X*/
-   setup->coef[slot].a0[0] = spfs->info.pixel_center_integer ? 0.0 : 0.5;
+   setup->coef[slot].a0[0] = fsInfo->pixel_center_integer ? 0.0 : 0.5;
    setup->coef[slot].dadx[0] = 1.0;
    setup->coef[slot].dady[0] = 0.0;
    /*Y*/
    setup->coef[slot].a0[1] =
-		   (spfs->info.origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0)
-		   + (spfs->info.pixel_center_integer ? 0.0 : 0.5);
+		   (fsInfo->origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0)
+		   + (fsInfo->pixel_center_integer ? 0.0 : 0.5);
    setup->coef[slot].dadx[1] = 0.0;
-   setup->coef[slot].dady[1] = spfs->info.origin_lower_left ? -1.0 : 1.0;
+   setup->coef[slot].dady[1] = fsInfo->origin_lower_left ? -1.0 : 1.0;
    /*Z*/
    setup->coef[slot].a0[2] = setup->posCoef.a0[2];
    setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
@@ -599,7 +600,7 @@ static void
 setup_tri_coefficients(struct setup_context *setup)
 {
    struct softpipe_context *softpipe = setup->softpipe;
-   const struct sp_fragment_shader *spfs = softpipe->fs;
+   const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
    uint fragSlot;
    float v[3];
@@ -618,7 +619,7 @@ setup_tri_coefficients(struct setup_context *setup)
 
    /* setup interpolation for all the remaining attributes:
     */
-   for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
+   for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
       uint j;
 
@@ -632,7 +633,7 @@ setup_tri_coefficients(struct setup_context *setup)
             tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
                                        setup->vmid[vertSlot][j],
                                        setup->vmax[vertSlot][j],
-                                       spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j),
+                                       fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
                                        v);
             tri_linear_coeff(setup, &setup->coef[fragSlot], j, v);
          }
@@ -642,7 +643,7 @@ setup_tri_coefficients(struct setup_context *setup)
             tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
                                        setup->vmid[vertSlot][j],
                                        setup->vmax[vertSlot][j],
-                                       spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j),
+                                       fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
                                        v);
             tri_persp_coeff(setup, &setup->coef[fragSlot], j, v);
          }
@@ -654,7 +655,7 @@ setup_tri_coefficients(struct setup_context *setup)
          assert(0);
       }
 
-      if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
+      if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
          /* convert 0 to 1.0 and 1 to -1.0 */
          setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
          setup->coef[fragSlot].dadx[0] = 0.0;
@@ -939,7 +940,7 @@ setup_line_coefficients(struct setup_context *setup,
                         const float (*v1)[4])
 {
    struct softpipe_context *softpipe = setup->softpipe;
-   const struct sp_fragment_shader *spfs = softpipe->fs;
+   const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
    const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe);
    uint fragSlot;
    float area;
@@ -974,7 +975,7 @@ setup_line_coefficients(struct setup_context *setup,
 
    /* setup interpolation for all the remaining attributes:
     */
-   for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
+   for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
       uint j;
 
@@ -987,7 +988,7 @@ setup_line_coefficients(struct setup_context *setup,
          for (j = 0; j < NUM_CHANNELS; j++) {
             line_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
                                         setup->vmax[vertSlot][j],
-                                        spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j),
+                                        fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
                                         v);
             line_linear_coeff(setup, &setup->coef[fragSlot], j, v);
          }
@@ -996,7 +997,7 @@ setup_line_coefficients(struct setup_context *setup,
          for (j = 0; j < NUM_CHANNELS; j++) {
             line_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
                                         setup->vmax[vertSlot][j],
-                                        spfs->info.input_cylindrical_wrap[fragSlot] & (1 << j),
+                                        fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
                                         v);
             line_persp_coeff(setup, &setup->coef[fragSlot], j, v);
          }
@@ -1008,7 +1009,7 @@ setup_line_coefficients(struct setup_context *setup,
          assert(0);
       }
 
-      if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
+      if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
          /* convert 0 to 1.0 and 1 to -1.0 */
          setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
          setup->coef[fragSlot].dadx[0] = 0.0;
@@ -1188,7 +1189,7 @@ sp_setup_point(struct setup_context *setup,
                const float (*v0)[4])
 {
    struct softpipe_context *softpipe = setup->softpipe;
-   const struct sp_fragment_shader *spfs = softpipe->fs;
+   const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
    const int sizeAttr = setup->softpipe->psize_slot;
    const float size
       = sizeAttr > 0 ? v0[sizeAttr][0]
@@ -1232,7 +1233,7 @@ sp_setup_point(struct setup_context *setup,
    const_coeff(setup, &setup->posCoef, 0, 2);
    const_coeff(setup, &setup->posCoef, 0, 3);
 
-   for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
+   for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
       const uint vertSlot = vinfo->attrib[fragSlot].src_index;
       uint j;
 
@@ -1255,7 +1256,7 @@ sp_setup_point(struct setup_context *setup,
          assert(0);
       }
 
-      if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
+      if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
          /* convert 0 to 1.0 and 1 to -1.0 */
          setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
          setup->coef[fragSlot].dadx[0] = 0.0;
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 6c14dd132e9..243f7aab8ba 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -60,31 +60,43 @@ struct tgsi_exec_machine;
 struct vertex_info;
 
 
-/**
- * Subclass of pipe_shader_state (though it doesn't really need to be).
- *
- * This is starting to look an awful lot like a quad pipeline stage...
- */
-struct sp_fragment_shader {
-   struct pipe_shader_state shader;
+struct sp_fragment_shader_variant_key
+{
+   int foo;  /* XXX temporary */
+};
 
+
+struct sp_fragment_shader_variant
+{
+   const struct tgsi_token *tokens;
+   struct sp_fragment_shader_variant_key key;
    struct tgsi_shader_info info;
 
+   /* See comments about this elsewhere */
+#if 0
    struct draw_fragment_shader *draw_shader;
+#endif
 
-   void (*prepare)( const struct sp_fragment_shader *shader,
-		    struct tgsi_exec_machine *machine,
-		    struct tgsi_sampler **samplers);
+   void (*prepare)(const struct sp_fragment_shader_variant *shader,
+		   struct tgsi_exec_machine *machine,
+		   struct tgsi_sampler **samplers);
 
-   /* Run the shader - this interface will get cleaned up in the
-    * future:
-    */
-   unsigned (*run)( const struct sp_fragment_shader *shader,
-		    struct tgsi_exec_machine *machine,
-		    struct quad_header *quad );
+   unsigned (*run)(const struct sp_fragment_shader_variant *shader,
+		   struct tgsi_exec_machine *machine,
+		   struct quad_header *quad);
+
+   /* Deletes this instance of the object */
+   void (*delete)(struct sp_fragment_shader_variant *shader);
+
+   struct sp_fragment_shader_variant *next;
+};
 
 
-   void (*delete)( struct sp_fragment_shader * );
+/** Subclass of pipe_shader_state */
+struct sp_fragment_shader {
+   struct pipe_shader_state shader;
+   struct sp_fragment_shader_variant *variants;
+   struct draw_fragment_shader *draw_shader;
 };
 
 
@@ -138,7 +150,7 @@ softpipe_set_framebuffer_state(struct pipe_context *,
                                const struct pipe_framebuffer_state *);
 
 void
-softpipe_update_derived( struct softpipe_context *softpipe );
+softpipe_update_derived(struct softpipe_context *softpipe);
 
 void
 softpipe_draw_vbo(struct pipe_context *pipe,
@@ -167,4 +179,10 @@ struct vertex_info *
 softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe);
 
 
+struct sp_fragment_shader_variant *
+softpipe_find_fs_variant(struct softpipe_context *softpipe,
+                         struct sp_fragment_shader *fs,
+                         const struct sp_fragment_shader_variant_key *key);
+
+
 #endif
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index f9590eb0b24..583d0bd9f7b 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -64,7 +64,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
 
    if (vinfo->num_attribs == 0) {
       /* compute vertex layout now */
-      const struct sp_fragment_shader *spfs = softpipe->fs;
+      const struct tgsi_shader_info *fsInfo = &softpipe->fs_variant->info;
       struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf;
       const uint num = draw_num_shader_outputs(softpipe->draw);
       uint i;
@@ -84,11 +84,11 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
        * from the vertex shader.
        */
       vinfo->num_attribs = 0;
-      for (i = 0; i < spfs->info.num_inputs; i++) {
+      for (i = 0; i < fsInfo->num_inputs; i++) {
          int src;
          enum interp_mode interp;
 
-         switch (spfs->info.input_interpolate[i]) {
+         switch (fsInfo->input_interpolate[i]) {
          case TGSI_INTERPOLATE_CONSTANT:
             interp = INTERP_CONSTANT;
             break;
@@ -103,7 +103,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
             interp = INTERP_LINEAR;
          }
 
-         switch (spfs->info.input_semantic_name[i]) {
+         switch (fsInfo->input_semantic_name[i]) {
          case TGSI_SEMANTIC_POSITION:
             interp = INTERP_POS;
             break;
@@ -117,8 +117,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
 
          /* this includes texcoords and varying vars */
          src = draw_find_shader_output(softpipe->draw,
-                                       spfs->info.input_semantic_name[i],
-                                       spfs->info.input_semantic_index[i]);
+                                       fsInfo->input_semantic_name[i],
+                                       fsInfo->input_semantic_index[i]);
          draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
       }
 
@@ -241,10 +241,46 @@ update_tgsi_samplers( struct softpipe_context *softpipe )
 }
 
 
+static void
+update_fragment_shader(struct softpipe_context *softpipe)
+{
+   struct sp_fragment_shader_variant_key key;
+
+   memset(&key, 0, sizeof(key));
+
+   if (softpipe->fs) {
+      softpipe->fs_variant = softpipe_find_fs_variant(softpipe,
+                                                      softpipe->fs, &key);
+   }
+   else {
+      softpipe->fs_variant = NULL;
+   }
+
+   /* This would be the logical place to pass the fragment shader
+    * to the draw module.  However, doing this here, during state
+    * validation, causes problems with the 'draw' module helpers for
+    * wide/AA/stippled lines.
+    * In principle, the draw's fragment shader should be per-variant
+    * but that doesn't work.  So we use a single draw fragment shader
+    * per fragment shader, not per variant.
+    */
+#if 0
+   if (softpipe->fs_variant) {
+      draw_bind_fragment_shader(softpipe->draw,
+                                softpipe->fs_variant->draw_shader);
+   }
+   else {
+      draw_bind_fragment_shader(softpipe->draw, NULL);
+   }
+#endif
+}
+
+
 /* Hopefully this will remain quite simple, otherwise need to pull in
  * something like the state tracker mechanism.
  */
-void softpipe_update_derived( struct softpipe_context *softpipe )
+void
+softpipe_update_derived(struct softpipe_context *softpipe)
 {
    struct softpipe_screen *sp_screen = softpipe_screen(softpipe->pipe.screen);
 
@@ -255,6 +291,10 @@ void softpipe_update_derived( struct softpipe_context *softpipe )
       softpipe->dirty |= SP_NEW_TEXTURE;
    }
       
+   if (softpipe->dirty & (SP_NEW_RASTERIZER |
+                          SP_NEW_FS))
+      update_fragment_shader(softpipe);
+
    if (softpipe->dirty & (SP_NEW_SAMPLER |
                           SP_NEW_TEXTURE |
                           SP_NEW_FS | 
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index 60331bc4976..16023c990a7 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -373,8 +373,9 @@ softpipe_reset_sampler_variants(struct softpipe_context *softpipe)
       }
    }
 
-   for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) {
+   for (i = 0; i <= softpipe->fs_variant->info.file_max[TGSI_FILE_SAMPLER]; i++) {
       if (softpipe->fragment_samplers[i]) {
+         assert(softpipe->fragment_sampler_views[i]->texture);
          softpipe->tgsi.frag_samplers_list[i] =
             get_sampler_variant( i,
                                  sp_sampler(softpipe->fragment_samplers[i]),
diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c
index 80af2578839..ddb9a98b45f 100644
--- a/src/gallium/drivers/softpipe/sp_state_shader.c
+++ b/src/gallium/drivers/softpipe/sp_state_shader.c
@@ -42,37 +42,91 @@
 #include "tgsi/tgsi_parse.h"
 
 
+/**
+ * Create a new fragment shader variant.
+ */
+static struct sp_fragment_shader_variant *
+create_fs_variant(struct softpipe_context *softpipe,
+                  struct sp_fragment_shader *fs,
+                  const struct sp_fragment_shader_variant_key *key)
+{
+   struct sp_fragment_shader_variant *var;
+   struct pipe_shader_state *curfs = &fs->shader;
+
+   /* codegen, create variant object */
+   var = softpipe_create_fs_variant_sse(softpipe, curfs);
+   if (!var) {
+      var = softpipe_create_fs_variant_exec(softpipe, curfs);
+   }
+
+   if (var) {
+      var->key = *key;
+      var->tokens = tgsi_dup_tokens(curfs->tokens);
+
+      tgsi_scan_shader(var->tokens, &var->info);
+
+      /* See comments elsewhere about draw fragment shaders */
+#if 0
+      /* draw's fs state */
+      var->draw_shader = draw_create_fragment_shader(softpipe->draw,
+                                                     &fs->shader);
+      if (!var->draw_shader) {
+         var->delete(var);
+         FREE((void *) var->tokens);
+         return NULL;
+      }
+#endif
+
+      /* insert variant into linked list */
+      var->next = fs->variants;
+      fs->variants = var;
+   }
+
+   return var;
+}
+
+
+struct sp_fragment_shader_variant *
+softpipe_find_fs_variant(struct softpipe_context *sp,
+                         struct sp_fragment_shader *fs,
+                         const struct sp_fragment_shader_variant_key *key)
+{
+   struct sp_fragment_shader_variant *var;
+
+   for (var = fs->variants; var; var = var->next) {
+      if (memcmp(&var->key, key, sizeof(*key)) == 0) {
+         /* found it */
+         return var;
+      }
+   }
+
+   return create_fs_variant(sp, fs, key);
+}
+
+
 static void *
 softpipe_create_fs_state(struct pipe_context *pipe,
                          const struct pipe_shader_state *templ)
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
-   struct sp_fragment_shader *state;
-   unsigned i;
+   struct sp_fragment_shader *state = CALLOC_STRUCT(sp_fragment_shader);
 
    /* debug */
    if (softpipe->dump_fs) 
       tgsi_dump(templ->tokens, 0);
 
-   /* codegen */
-   state = softpipe_create_fs_sse( softpipe, templ );
-   if (!state) {
-      state = softpipe_create_fs_exec( softpipe, templ );
-   }
-
-   if (!state)
-      return NULL;
+   /* we need to keep a local copy of the tokens */
+   state->shader.tokens = tgsi_dup_tokens(templ->tokens);
 
    /* draw's fs state */
-   state->draw_shader = draw_create_fragment_shader(softpipe->draw, templ);
+   state->draw_shader = draw_create_fragment_shader(softpipe->draw,
+                                                    &state->shader);
    if (!state->draw_shader) {
-      state->delete( state );
+      FREE((void *) state->shader.tokens);
+      FREE(state);
       return NULL;
    }
 
-   /* get/save the summary info for this shader */
-   tgsi_scan_shader(templ->tokens, &state->info);
-
    return state;
 }
 
@@ -81,6 +135,7 @@ static void
 softpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
+   struct sp_fragment_shader *state = (struct sp_fragment_shader *) fs;
 
    if (softpipe->fs == fs)
       return;
@@ -89,8 +144,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
 
    softpipe->fs = fs;
 
-   draw_bind_fragment_shader(softpipe->draw,
-                             (softpipe->fs ? softpipe->fs->draw_shader : NULL));
+   if (fs == NULL)
+      softpipe->fs_variant = NULL;
+
+   if (state)
+      draw_bind_fragment_shader(softpipe->draw,
+                                state->draw_shader);
+   else
+      draw_bind_fragment_shader(softpipe->draw, NULL);
 
    softpipe->dirty |= SP_NEW_FS;
 }
@@ -101,8 +162,9 @@ softpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
    struct sp_fragment_shader *state = fs;
+   struct sp_fragment_shader_variant *var, *next_var;
 
-   assert(fs != softpipe_context(pipe)->fs);
+   assert(fs != softpipe->fs);
 
    if (softpipe->fs_machine->Tokens == state->shader.tokens) {
       /* unbind the shader from the tgsi executor if we're
@@ -111,9 +173,23 @@ softpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
       tgsi_exec_machine_bind_shader(softpipe->fs_machine, NULL, 0, NULL);
    }
 
+   /* delete variants */
+   for (var = state->variants; var; var = next_var) {
+      next_var = var->next;
+
+      assert(var != softpipe->fs_variant);
+
+      /* See comments elsewhere about draw fragment shaders */
+#if 0
+      draw_delete_fragment_shader(softpipe->draw, var->draw_shader);
+#endif
+
+      var->delete(var);
+   }
+
    draw_delete_fragment_shader(softpipe->draw, state->draw_shader);
 
-   state->delete( state );
+   FREE((void *) state->shader.tokens);
 }
 
 

From 57aa597b3d5dac0fc59c05557dafec59e14e1019 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 21 Jul 2011 09:55:22 -0600
Subject: [PATCH 040/600] softpipe: use the polygon stipple utility module

This is an alternative to the draw module's polygon stipple stage.
The softpipe implementation here is just a test.  The advantange of
using the new polygon stipple utility module (with other drivers)
is we can avoid software vertex processing in the draw module and
get much better performance.
Polygon stipple doesn't require special vertex processing like
the other draw module stage.
---
 src/gallium/drivers/softpipe/sp_clear.c       |  2 +-
 src/gallium/drivers/softpipe/sp_context.c     | 14 ++++
 src/gallium/drivers/softpipe/sp_context.h     | 14 +++-
 src/gallium/drivers/softpipe/sp_draw_arrays.c |  4 +-
 src/gallium/drivers/softpipe/sp_quad_pipe.c   |  2 +-
 src/gallium/drivers/softpipe/sp_setup.c       |  2 +-
 src/gallium/drivers/softpipe/sp_state.h       | 12 ++-
 .../drivers/softpipe/sp_state_derived.c       | 78 ++++++++++++++++++-
 .../drivers/softpipe/sp_state_shader.c        | 17 +++-
 9 files changed, 131 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c
index ae3f00f3387..22e8a2e5817 100644
--- a/src/gallium/drivers/softpipe/sp_clear.c
+++ b/src/gallium/drivers/softpipe/sp_clear.c
@@ -60,7 +60,7 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
       return;
 
 #if 0
-   softpipe_update_derived(softpipe); /* not needed?? */
+   softpipe_update_derived(softpipe, PIPE_PRIM_TRIANGLES); /* not needed?? */
 #endif
 
    if (buffers & PIPE_CLEAR_COLOR) {
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index ce22f646228..eabf2dae3fc 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -35,6 +35,7 @@
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_pstipple.h"
 #include "util/u_inlines.h"
 #include "tgsi/tgsi_exec.h"
 #include "sp_clear.h"
@@ -88,6 +89,14 @@ softpipe_destroy( struct pipe_context *pipe )
    struct softpipe_context *softpipe = softpipe_context( pipe );
    uint i;
 
+#if DO_PSTIPPLE_IN_HELPER_MODULE
+   if (softpipe->pstipple.sampler)
+      pipe->delete_sampler_state(pipe, softpipe->pstipple.sampler);
+
+   pipe_resource_reference(&softpipe->pstipple.texture, NULL);
+   pipe_sampler_view_reference(&softpipe->pstipple.sampler_view, NULL);
+#endif
+
    if (softpipe->draw)
       draw_destroy( softpipe->draw );
 
@@ -341,6 +350,11 @@ softpipe_create_context( struct pipe_screen *screen,
 
    sp_init_surface_functions(softpipe);
 
+#if DO_PSTIPPLE_IN_HELPER_MODULE
+   /* create the polgon stipple sampler */
+   softpipe->pstipple.sampler = util_pstipple_create_sampler(&softpipe->pipe);
+#endif
+
    return &softpipe->pipe;
 
  fail:
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 79291abca97..410b0a65792 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -38,8 +38,11 @@
 #include "sp_quad_pipe.h"
 
 
-/** Do polygon stipple in the driver here, or in the draw module? */
-#define DO_PSTIPPLE_IN_DRAW_MODULE 1
+/** Do polygon stipple in the draw module? */
+#define DO_PSTIPPLE_IN_DRAW_MODULE 0
+
+/** Do polygon stipple with the util module? */
+#define DO_PSTIPPLE_IN_HELPER_MODULE 1
 
 
 struct softpipe_vbuf_render;
@@ -144,6 +147,13 @@ struct softpipe_context {
    struct pipe_query *render_cond_query;
    uint render_cond_mode;
 
+   /** Polygon stipple items */
+   struct {
+      struct pipe_resource *texture;
+      struct pipe_sampler_state *sampler;
+      struct pipe_sampler_view *sampler_view;
+   } pstipple;
+
    /** Software quad rendering pipeline */
    struct {
       struct quad_stage *shade;
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index 01b4ca985d0..69b5b96b4fd 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -64,7 +64,7 @@ softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode)
    sp->reduced_api_prim = u_reduced_prim(mode);
 
    if (sp->dirty) {
-      softpipe_update_derived(sp);
+      softpipe_update_derived(sp, sp->reduced_api_prim);
    }
 
    softpipe_map_transfers(sp);
@@ -122,7 +122,7 @@ softpipe_draw_vbo(struct pipe_context *pipe,
    sp->reduced_api_prim = u_reduced_prim(info->mode);
 
    if (sp->dirty) {
-      softpipe_update_derived(sp);
+      softpipe_update_derived(sp, sp->reduced_api_prim);
    }
 
    softpipe_map_transfers(sp);
diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c
index a98f8b7bde5..0c4506ae8f4 100644
--- a/src/gallium/drivers/softpipe/sp_quad_pipe.c
+++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c
@@ -61,7 +61,7 @@ sp_build_quad_pipeline(struct softpipe_context *sp)
       insert_stage_at_head( sp, sp->quad.shade );
    }
 
-#if !DO_PSTIPPLE_IN_DRAW_MODULE
+#if !DO_PSTIPPLE_IN_DRAW_MODULE && !DO_PSTIPPLE_IN_HELPER_MODULE
    if (sp->rasterizer->poly_stipple_enable)
       insert_stage_at_head( sp, sp->quad.pstipple );
 #endif
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index b82594ca2a5..656d001809f 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -1397,7 +1397,7 @@ sp_setup_prepare(struct setup_context *setup)
    struct softpipe_context *sp = setup->softpipe;
 
    if (sp->dirty) {
-      softpipe_update_derived(sp);
+      softpipe_update_derived(sp, sp->reduced_api_prim);
    }
 
    /* Note: nr_attrs is only used for debugging (vertex printing) */
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 243f7aab8ba..ec4c8cf5e4d 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -62,7 +62,7 @@ struct vertex_info;
 
 struct sp_fragment_shader_variant_key
 {
-   int foo;  /* XXX temporary */
+   boolean polygon_stipple;
 };
 
 
@@ -72,6 +72,8 @@ struct sp_fragment_shader_variant
    struct sp_fragment_shader_variant_key key;
    struct tgsi_shader_info info;
 
+   unsigned stipple_sampler_unit;
+
    /* See comments about this elsewhere */
 #if 0
    struct draw_fragment_shader *draw_shader;
@@ -150,7 +152,7 @@ softpipe_set_framebuffer_state(struct pipe_context *,
                                const struct pipe_framebuffer_state *);
 
 void
-softpipe_update_derived(struct softpipe_context *softpipe);
+softpipe_update_derived(struct softpipe_context *softpipe, unsigned prim);
 
 void
 softpipe_draw_vbo(struct pipe_context *pipe,
@@ -179,6 +181,12 @@ struct vertex_info *
 softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe);
 
 
+struct sp_fragment_shader_variant *
+softpipe_find_fs_variant(struct softpipe_context *softpipe,
+                         struct sp_fragment_shader *fs,
+                         const struct sp_fragment_shader_variant_key *key);
+
+
 struct sp_fragment_shader_variant *
 softpipe_find_fs_variant(struct softpipe_context *softpipe,
                          struct sp_fragment_shader *fs,
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index 583d0bd9f7b..fd688089a3e 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -25,8 +25,10 @@
  * 
  **************************************************************************/
 
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_pstipple.h"
 #include "pipe/p_shader_tokens.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vertex.h"
@@ -242,12 +244,15 @@ update_tgsi_samplers( struct softpipe_context *softpipe )
 
 
 static void
-update_fragment_shader(struct softpipe_context *softpipe)
+update_fragment_shader(struct softpipe_context *softpipe, unsigned prim)
 {
    struct sp_fragment_shader_variant_key key;
 
    memset(&key, 0, sizeof(key));
 
+   if (prim == PIPE_PRIM_TRIANGLES)
+      key.polygon_stipple = softpipe->rasterizer->poly_stipple_enable;
+
    if (softpipe->fs) {
       softpipe->fs_variant = softpipe_find_fs_variant(softpipe,
                                                       softpipe->fs, &key);
@@ -276,11 +281,63 @@ update_fragment_shader(struct softpipe_context *softpipe)
 }
 
 
+/**
+ * This should be called when the polygon stipple pattern changes.
+ * We create a new texture from the stipple pattern and create a new
+ * sampler view.
+ */
+static void
+update_polygon_stipple_pattern(struct softpipe_context *softpipe)
+{
+   struct pipe_resource *tex;
+   struct pipe_sampler_view *view;
+
+   tex = util_pstipple_create_stipple_texture(&softpipe->pipe,
+                                              softpipe->poly_stipple.stipple);
+   pipe_resource_reference(&softpipe->pstipple.texture, tex);
+
+   view = util_pstipple_create_sampler_view(&softpipe->pipe, tex);
+   pipe_sampler_view_reference(&softpipe->pstipple.sampler_view, view);
+}
+
+
+/**
+ * Should be called when polygon stipple is enabled/disabled or when
+ * the fragment shader changes.
+ * We add/update the fragment sampler and sampler views to sample from
+ * the polygon stipple texture.  The texture unit that we use depends on
+ * the fragment shader (we need to use a unit not otherwise used by the
+ * shader).
+ */
+static void
+update_polygon_stipple_enable(struct softpipe_context *softpipe, unsigned prim)
+{
+   if (prim == PIPE_PRIM_TRIANGLES &&
+       softpipe->fs_variant->key.polygon_stipple) {
+      const unsigned unit = softpipe->fs_variant->stipple_sampler_unit;
+
+      assert(unit >= softpipe->num_fragment_samplers);
+
+      /* sampler state */
+      softpipe->fragment_samplers[unit] = softpipe->pstipple.sampler;
+
+      /* sampler view */
+      pipe_sampler_view_reference(&softpipe->fragment_sampler_views[unit],
+                                  softpipe->pstipple.sampler_view);
+
+      sp_tex_tile_cache_set_sampler_view(softpipe->fragment_tex_cache[unit],
+                                         softpipe->pstipple.sampler_view);
+
+      softpipe->dirty |= SP_NEW_SAMPLER;
+   }
+}
+
+
 /* Hopefully this will remain quite simple, otherwise need to pull in
  * something like the state tracker mechanism.
  */
 void
-softpipe_update_derived(struct softpipe_context *softpipe)
+softpipe_update_derived(struct softpipe_context *softpipe, unsigned prim)
 {
    struct softpipe_screen *sp_screen = softpipe_screen(softpipe->pipe.screen);
 
@@ -290,10 +347,23 @@ softpipe_update_derived(struct softpipe_context *softpipe)
       softpipe->tex_timestamp = sp_screen->timestamp;
       softpipe->dirty |= SP_NEW_TEXTURE;
    }
-      
+
+#if DO_PSTIPPLE_IN_HELPER_MODULE
+   if (softpipe->dirty & SP_NEW_STIPPLE)
+      /* before updating samplers! */
+      update_polygon_stipple_pattern(softpipe);
+#endif
+
    if (softpipe->dirty & (SP_NEW_RASTERIZER |
                           SP_NEW_FS))
-      update_fragment_shader(softpipe);
+      update_fragment_shader(softpipe, prim);
+
+#if DO_PSTIPPLE_IN_HELPER_MODULE
+   if (softpipe->dirty & (SP_NEW_RASTERIZER |
+                          SP_NEW_STIPPLE |
+                          SP_NEW_FS))
+      update_polygon_stipple_enable(softpipe, prim);
+#endif
 
    if (softpipe->dirty & (SP_NEW_SAMPLER |
                           SP_NEW_TEXTURE |
diff --git a/src/gallium/drivers/softpipe/sp_state_shader.c b/src/gallium/drivers/softpipe/sp_state_shader.c
index ddb9a98b45f..da895270aa9 100644
--- a/src/gallium/drivers/softpipe/sp_state_shader.c
+++ b/src/gallium/drivers/softpipe/sp_state_shader.c
@@ -33,6 +33,7 @@
 #include "pipe/p_defines.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
+#include "util/u_pstipple.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vs.h"
 #include "draw/draw_gs.h"
@@ -51,7 +52,15 @@ create_fs_variant(struct softpipe_context *softpipe,
                   const struct sp_fragment_shader_variant_key *key)
 {
    struct sp_fragment_shader_variant *var;
-   struct pipe_shader_state *curfs = &fs->shader;
+   struct pipe_shader_state *stipple_fs = NULL, *curfs = &fs->shader;
+   unsigned unit = 0;
+
+   if (key->polygon_stipple) {
+      /* get new shader that implements polygon stippling */
+      stipple_fs = util_pstipple_create_fragment_shader(&softpipe->pipe,
+                                                        curfs, &unit);
+      curfs = stipple_fs;
+   }
 
    /* codegen, create variant object */
    var = softpipe_create_fs_variant_sse(softpipe, curfs);
@@ -62,6 +71,7 @@ create_fs_variant(struct softpipe_context *softpipe,
    if (var) {
       var->key = *key;
       var->tokens = tgsi_dup_tokens(curfs->tokens);
+      var->stipple_sampler_unit = unit;
 
       tgsi_scan_shader(var->tokens, &var->info);
 
@@ -82,6 +92,11 @@ create_fs_variant(struct softpipe_context *softpipe,
       fs->variants = var;
    }
 
+   if (stipple_fs) {
+      free((void *) stipple_fs->tokens);
+      free(stipple_fs);
+   }
+
    return var;
 }
 

From 50e32fefb1140a42101b1154d3df78db4906ee38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 21 Jul 2011 22:31:24 +0200
Subject: [PATCH 041/600] configure.ac: check for libdrm_radeon only when
 building classic

---
 configure.ac | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index f72db119fb3..86ba87b39e8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1073,11 +1073,6 @@ AC_SUBST([MESA_MODULES])
 
 AC_SUBST([HAVE_XF86VIDMODE])
 
-PKG_CHECK_MODULES([LIBDRM_RADEON],
-		  [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED],
-		  HAVE_LIBDRM_RADEON=yes,
-		  HAVE_LIBDRM_RADEON=no)
-
 dnl
 dnl More GLX setup
 dnl
@@ -1270,6 +1265,11 @@ esac
 
 case $DRI_DIRS in
 *radeon*|*r200*|*r300*|*r600*)
+    PKG_CHECK_MODULES([LIBDRM_RADEON],
+		      [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED],
+		      HAVE_LIBDRM_RADEON=yes,
+		      HAVE_LIBDRM_RADEON=no)
+
     if test "x$HAVE_LIBDRM_RADEON" = xyes; then
 	RADEON_CFLAGS="-DHAVE_LIBDRM_RADEON=1 $LIBDRM_RADEON_CFLAGS"
 	RADEON_LDFLAGS=$LIBDRM_RADEON_LIBS

From a87afba50529c6ae6762a3da68d4f31bc431e064 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Thu, 21 Jul 2011 13:53:34 -0400
Subject: [PATCH 042/600] Revert "g3dvl: Preserve previously rendered
 components for MC output."

This reverts commit b56daf71d2f63d044d4c53ab49c6f87e02991a28.

The bug is actually in softpipe's blend and writemask interaction.
---
 src/gallium/auxiliary/vl/vl_mc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
index 81a05b539f3..e5ae0f72c4c 100644
--- a/src/gallium/auxiliary/vl/vl_mc.c
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -591,7 +591,7 @@ vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface)
 }
 
 static void
-prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned component, unsigned mask)
+prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned mask)
 {
    struct vl_mc *renderer;
 
@@ -600,7 +600,7 @@ prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned component, unsign
    renderer = buffer->renderer;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
 
-   if (buffer->surface_cleared || component > 0)
+   if (buffer->surface_cleared)
       renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add[mask]);
    else
       renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear[mask]);
@@ -616,7 +616,7 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref)
 
    assert(buffer && ref);
 
-   prepare_pipe_4_rendering(buffer, 0, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B);
+   prepare_pipe_4_rendering(buffer, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B);
 
    renderer = buffer->renderer;
 
@@ -644,7 +644,7 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num
    if (num_instances == 0)
       return;
 
-   prepare_pipe_4_rendering(buffer, component, mask);
+   prepare_pipe_4_rendering(buffer, mask);
 
    renderer = buffer->renderer;
 

From 12c22cab77f35a887d9f6790e0de4a8fa4b3b575 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Thu, 7 Jul 2011 13:03:45 -0700
Subject: [PATCH 043/600] mesa: Add an ifndef guard around the definition of
 the INLINE macro

Several Mesa headers redundantly define the INLINE macro.  Adding this
guard prevents the compiler from complaining about macro redefinition.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/main/compiler.h | 42 +++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index 743841be4ef..d736fdfc58a 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -139,26 +139,28 @@ extern "C" {
 /**
  * Function inlining
  */
-#if defined(__GNUC__)
-#  define INLINE __inline__
-#elif defined(__MSC__)
-#  define INLINE __inline
-#elif defined(_MSC_VER)
-#  define INLINE __inline
-#elif defined(__ICL)
-#  define INLINE __inline
-#elif defined(__INTEL_COMPILER)
-#  define INLINE inline
-#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
-#  define INLINE __inline
-#elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
-#  define INLINE inline
-#  define __inline inline
-#  define __inline__ inline
-#elif (__STDC_VERSION__ >= 199901L) /* C99 */
-#  define INLINE inline
-#else
-#  define INLINE
+#ifndef INLINE
+#  if defined(__GNUC__)
+#    define INLINE __inline__
+#  elif defined(__MSC__)
+#    define INLINE __inline
+#  elif defined(_MSC_VER)
+#    define INLINE __inline
+#  elif defined(__ICL)
+#    define INLINE __inline
+#  elif defined(__INTEL_COMPILER)
+#    define INLINE inline
+#  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
+#    define INLINE __inline
+#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+#    define INLINE inline
+#    define __inline inline
+#    define __inline__ inline
+#  elif (__STDC_VERSION__ >= 199901L) /* C99 */
+#    define INLINE inline
+#  else
+#    define INLINE
+#  endif
 #endif
 
 

From f129f618fe8a5397774484f1b7afb42d4be809a0 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Thu, 7 Jul 2011 14:01:40 -0700
Subject: [PATCH 044/600] glsl: Move functions into standalone_scaffolding.cpp
 for later reuse.

This patch moves the following functions from main.cpp (the main cpp
file for the standalone executable that is used to create the built-in
functions) to standalone_scaffolding.cpp, so that they can be re-used
in other standalone executables:

- initialize_context()*
- _mesa_new_shader()
- _mesa_reference_shader()

*initialize_context contained some code that was specific to main.cpp,
so it was split into two functions: initialize_context() (which
remains in main.cpp), and initialize_context_from_defaults() (which is
in standalone_scaffolding.cpp).
---
 src/glsl/Makefile                   |  3 +-
 src/glsl/main.cpp                   | 60 +------------------
 src/glsl/standalone_scaffolding.cpp | 91 +++++++++++++++++++++++++++++
 src/glsl/standalone_scaffolding.h   | 54 +++++++++++++++++
 4 files changed, 150 insertions(+), 58 deletions(-)
 create mode 100644 src/glsl/standalone_scaffolding.cpp
 create mode 100644 src/glsl/standalone_scaffolding.h

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index d1422c2a4d6..edfb35eb0b8 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -95,7 +95,8 @@ GLSL2_C_SOURCES = \
 	../mesa/program/hash_table.c \
 	../mesa/program/symbol_table.c
 GLSL2_CXX_SOURCES = \
-	main.cpp
+	main.cpp \
+	standalone_scaffolding.cpp
 
 GLSL2_OBJECTS = \
 	$(GLSL2_C_SOURCES:.c=.o) \
diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index 7952bb1a3e3..9f85096e1a1 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -29,80 +29,26 @@
 #include "ir_print_visitor.h"
 #include "program.h"
 #include "loop_analysis.h"
-
-extern "C" struct gl_shader *
-_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type);
-
-extern "C" void
-_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
-                       struct gl_shader *sh);
-
-/* Copied from shader_api.c for the stand-alone compiler.
- */
-void
-_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
-                       struct gl_shader *sh)
-{
-   *ptr = sh;
-}
-
-struct gl_shader *
-_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type)
-{
-   struct gl_shader *shader;
-
-   (void) ctx;
-
-   assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER);
-   shader = rzalloc(NULL, struct gl_shader);
-   if (shader) {
-      shader->Type = type;
-      shader->Name = name;
-      shader->RefCount = 1;
-   }
-   return shader;
-}
+#include "standalone_scaffolding.h"
 
 static void
 initialize_context(struct gl_context *ctx, gl_api api)
 {
-   memset(ctx, 0, sizeof(*ctx));
-
-   ctx->API = api;
-
-   ctx->Extensions.ARB_ES2_compatibility = GL_TRUE;
-   ctx->Extensions.ARB_draw_buffers = GL_TRUE;
-   ctx->Extensions.ARB_draw_instanced = GL_TRUE;
-   ctx->Extensions.ARB_fragment_coord_conventions = GL_TRUE;
-   ctx->Extensions.EXT_texture_array = GL_TRUE;
-   ctx->Extensions.NV_texture_rectangle = GL_TRUE;
-   ctx->Extensions.EXT_texture3D = GL_TRUE;
+   initialize_context_to_defaults(ctx, api);
 
    /* GLSL 1.30 isn't fully supported, but we need to advertise 1.30 so that
     * the built-in functions for 1.30 can be built.
     */
    ctx->Const.GLSLVersion = 130;
 
-   /* 1.10 minimums. */
-   ctx->Const.MaxLights = 8;
    ctx->Const.MaxClipPlanes = 8;
-   ctx->Const.MaxTextureUnits = 2;
+   ctx->Const.MaxDrawBuffers = 2;
 
    /* More than the 1.10 minimum to appease parser tests taken from
     * apps that (hopefully) already checked the number of coords.
     */
    ctx->Const.MaxTextureCoordUnits = 4;
 
-   ctx->Const.VertexProgram.MaxAttribs = 16;
-   ctx->Const.VertexProgram.MaxUniformComponents = 512;
-   ctx->Const.MaxVarying = 8;
-   ctx->Const.MaxVertexTextureImageUnits = 0;
-   ctx->Const.MaxCombinedTextureImageUnits = 2;
-   ctx->Const.MaxTextureImageUnits = 2;
-   ctx->Const.FragmentProgram.MaxUniformComponents = 64;
-
-   ctx->Const.MaxDrawBuffers = 2;
-
    ctx->Driver.NewShader = _mesa_new_shader;
 }
 
diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp
new file mode 100644
index 00000000000..696ea757e96
--- /dev/null
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* This file declares stripped-down versions of functions that
+ * normally exist outside of the glsl folder, so that they can be used
+ * when running the GLSL compiler standalone (for unit testing or
+ * compiling builtins).
+ */
+
+#include "standalone_scaffolding.h"
+
+#include <assert.h>
+#include <string.h>
+#include "ralloc.h"
+
+void
+_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
+                       struct gl_shader *sh)
+{
+   *ptr = sh;
+}
+
+struct gl_shader *
+_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type)
+{
+   struct gl_shader *shader;
+
+   (void) ctx;
+
+   assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER);
+   shader = rzalloc(NULL, struct gl_shader);
+   if (shader) {
+      shader->Type = type;
+      shader->Name = name;
+      shader->RefCount = 1;
+   }
+   return shader;
+}
+
+void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
+{
+   memset(ctx, 0, sizeof(*ctx));
+
+   ctx->API = api;
+
+   ctx->Extensions.ARB_ES2_compatibility = true;
+   ctx->Extensions.ARB_draw_buffers = true;
+   ctx->Extensions.ARB_draw_instanced = true;
+   ctx->Extensions.ARB_fragment_coord_conventions = true;
+   ctx->Extensions.EXT_texture_array = true;
+   ctx->Extensions.NV_texture_rectangle = true;
+   ctx->Extensions.EXT_texture3D = true;
+
+   ctx->Const.GLSLVersion = 120;
+
+   /* 1.20 minimums. */
+   ctx->Const.MaxLights = 8;
+   ctx->Const.MaxClipPlanes = 6;
+   ctx->Const.MaxTextureUnits = 2;
+   ctx->Const.MaxTextureCoordUnits = 2;
+   ctx->Const.VertexProgram.MaxAttribs = 16;
+
+   ctx->Const.VertexProgram.MaxUniformComponents = 512;
+   ctx->Const.MaxVarying = 8; /* == gl_MaxVaryingFloats / 4 */
+   ctx->Const.MaxVertexTextureImageUnits = 0;
+   ctx->Const.MaxCombinedTextureImageUnits = 2;
+   ctx->Const.MaxTextureImageUnits = 2;
+   ctx->Const.FragmentProgram.MaxUniformComponents = 64;
+
+   ctx->Const.MaxDrawBuffers = 1;
+}
diff --git a/src/glsl/standalone_scaffolding.h b/src/glsl/standalone_scaffolding.h
new file mode 100644
index 00000000000..87733200670
--- /dev/null
+++ b/src/glsl/standalone_scaffolding.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* This file declares stripped-down versions of functions that
+ * normally exist outside of the glsl folder, so that they can be used
+ * when running the GLSL compiler standalone (for unit testing or
+ * compiling builtins).
+ */
+
+#pragma once
+#ifndef STANDALONE_SCAFFOLDING_H
+#define STANDALONE_SCAFFOLDING_H
+
+#include "main/mtypes.h"
+
+extern "C" void
+_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
+                       struct gl_shader *sh);
+
+extern "C" struct gl_shader *
+_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type);
+
+/**
+ * Initialize the given gl_context structure to a reasonable set of
+ * defaults representing the minimum capabilities required by the
+ * OpenGL spec.
+ *
+ * This is used when compiling builtin functions and in testing, when
+ * we don't have a connection to an actual driver.
+ */
+void initialize_context_to_defaults(struct gl_context *ctx, gl_api api);
+
+
+#endif /* STANDALONE_SCAFFOLDING_H */

From f1f76e157ed1ba554fc3a0172113997344049e07 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Wed, 29 Jun 2011 12:30:04 -0700
Subject: [PATCH 045/600] glsl: Create a standalone executable for testing
 optimization passes.

This patch adds a new build artifact, glsl_test, which can be used for
testing optimization passes in isolation.

I'm hoping that we will be able to add other useful standalone tests
to this executable in the future.  Accordingly, it is built in a
modular fashion: the main() function uses its first argument to
determine which test function to invoke, removes that argument from
argv[], and then calls that function to interpret the rest of the
command line arguments and perform the test.  Currently the only test
function is "optpass", which tests optimization passes.
---
 src/glsl/.gitignore       |   1 +
 src/glsl/Makefile         |  24 +++-
 src/glsl/test.cpp         |  78 +++++++++++
 src/glsl/test_optpass.cpp | 273 ++++++++++++++++++++++++++++++++++++++
 src/glsl/test_optpass.h   |  30 +++++
 5 files changed, 403 insertions(+), 3 deletions(-)
 create mode 100644 src/glsl/test.cpp
 create mode 100644 src/glsl/test_optpass.cpp
 create mode 100644 src/glsl/test_optpass.h

diff --git a/src/glsl/.gitignore b/src/glsl/.gitignore
index dfbd572d894..d26839a3e3e 100644
--- a/src/glsl/.gitignore
+++ b/src/glsl/.gitignore
@@ -5,3 +5,4 @@ glsl_parser.h
 glsl_parser.output
 builtin_function.cpp
 builtin_compiler
+glsl_test
diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index edfb35eb0b8..005b51d724b 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -89,7 +89,7 @@ CXX_SOURCES = \
 LIBS = \
 	$(TOP)/src/glsl/libglsl.a
 
-APPS = glsl_compiler glcpp/glcpp
+APPS = glsl_compiler glsl_test glcpp/glcpp
 
 GLSL2_C_SOURCES = \
 	../mesa/program/hash_table.c \
@@ -102,6 +102,19 @@ GLSL2_OBJECTS = \
 	$(GLSL2_C_SOURCES:.c=.o) \
 	$(GLSL2_CXX_SOURCES:.cpp=.o)
 
+TEST_C_SOURCES = \
+	../mesa/program/hash_table.c \
+	../mesa/program/symbol_table.c
+
+TEST_CXX_SOURCES = \
+	standalone_scaffolding.cpp \
+	test.cpp \
+	test_optpass.cpp
+
+TEST_OBJECTS = \
+	$(TEST_C_SOURCES:.c=.o) \
+	$(TEST_CXX_SOURCES:.cpp=.o)
+
 ### Basic defines ###
 
 DEFINES += \
@@ -130,7 +143,9 @@ ALL_SOURCES = \
 	$(C_SOURCES) \
 	$(CXX_SOURCES) \
 	$(GLSL2_CXX_SOURCES) \
-	$(GLSL2_C_SOURCES)
+	$(GLSL2_C_SOURCES) \
+	$(TEST_CXX_SOURCES) \
+	$(TEST_C_SOURCES)
 
 ##### TARGETS #####
 
@@ -152,7 +167,7 @@ depend: $(ALL_SOURCES) Makefile
 
 # Remove .o and backup files
 clean: clean-dricore
-	rm -f $(GLCPP_OBJECTS) $(GLSL2_OBJECTS) $(OBJECTS) lib$(LIBNAME).a depend depend.bak builtin_function.cpp builtin_function.o builtin_stubs.o builtin_compiler
+	rm -f $(GLCPP_OBJECTS) $(GLSL2_OBJECTS) $(TEST_OBJECTS) $(OBJECTS) lib$(LIBNAME).a depend depend.bak builtin_function.cpp builtin_function.o builtin_stubs.o builtin_compiler
 	-rm -f $(APPS)
 
 clean-dricore:
@@ -175,6 +190,9 @@ install-dricore: default
 glsl_compiler: $(GLSL2_OBJECTS) libglsl.a builtin_stubs.o
 	$(APP_CXX) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLSL2_OBJECTS) builtin_stubs.o $(LIBS) -o $@
 
+glsl_test: $(TEST_OBJECTS) libglsl.a builtin_stubs.o
+	$(APP_CXX) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(TEST_OBJECTS) builtin_stubs.o $(LIBS) -o $@
+
 glcpp: glcpp/glcpp
 glcpp/glcpp: $(GLCPP_OBJECTS)
 	$(APP_CC) $(INCLUDES) $(CFLAGS) $(LDFLAGS) $(GLCPP_OBJECTS) -o $@
diff --git a/src/glsl/test.cpp b/src/glsl/test.cpp
new file mode 100644
index 00000000000..b1ff92ed1d4
--- /dev/null
+++ b/src/glsl/test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file test.cpp
+ *
+ * Standalone tests for the GLSL compiler.
+ *
+ * This file provides a standalone executable which can be used to
+ * test components of the GLSL.
+ *
+ * Each test is a function with the same signature as main().  The
+ * main function interprets its first argument as the name of the test
+ * to run, strips out that argument, and then calls the test function.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "test_optpass.h"
+
+/**
+ * Print proper usage and exit with failure.
+ */
+static void
+usage_fail(const char *name)
+{
+   printf("*** usage: %s <command> <options>\n", name);
+   printf("\n");
+   printf("Possible commands are:\n");
+   printf("  optpass: test an optimization pass in isolation\n");
+   exit(EXIT_FAILURE);
+}
+
+static const char *extract_command_from_argv(int *argc, char **argv)
+{
+   if (*argc < 2) {
+      usage_fail(argv[0]);
+   }
+   const char *command = argv[1];
+   --*argc;
+   memmove(&argv[1], &argv[2], (*argc) * sizeof(argv[1]));
+   return command;
+}
+
+int main(int argc, char **argv)
+{
+   const char *command = extract_command_from_argv(&argc, argv);
+   if (strcmp(command, "optpass") == 0) {
+      return test_optpass(argc, argv);
+   } else {
+      usage_fail(argv[0]);
+   }
+
+   /* Execution should never reach here. */
+   return EXIT_FAILURE;
+}
diff --git a/src/glsl/test_optpass.cpp b/src/glsl/test_optpass.cpp
new file mode 100644
index 00000000000..89b7f8338dc
--- /dev/null
+++ b/src/glsl/test_optpass.cpp
@@ -0,0 +1,273 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file test_optpass.cpp
+ *
+ * Standalone test for optimization passes.
+ *
+ * This file provides the "optpass" command for the standalone
+ * glsl_test app.  It accepts either GLSL or high-level IR as input,
+ * and performs the optimiation passes specified on the command line.
+ * It outputs the IR, both before and after optimiations.
+ */
+
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <getopt.h>
+
+#include "ast.h"
+#include "ir_optimization.h"
+#include "ir_print_visitor.h"
+#include "program.h"
+#include "ir_reader.h"
+#include "standalone_scaffolding.h"
+
+using namespace std;
+
+static string read_stdin_to_eof()
+{
+   stringbuf sb;
+   cin.get(sb, '\0');
+   return sb.str();
+}
+
+static GLboolean
+do_optimization(struct exec_list *ir, const char *optimization)
+{
+   int int_0;
+   int int_1;
+   int int_2;
+   int int_3;
+   int int_4;
+
+   if (sscanf(optimization, "do_common_optimization ( %d , %d ) ",
+              &int_0, &int_1) == 2) {
+      return do_common_optimization(ir, int_0 != 0, int_1);
+   } else if (strcmp(optimization, "do_algebraic") == 0) {
+      return do_algebraic(ir);
+   } else if (strcmp(optimization, "do_constant_folding") == 0) {
+      return do_constant_folding(ir);
+   } else if (strcmp(optimization, "do_constant_variable") == 0) {
+      return do_constant_variable(ir);
+   } else if (strcmp(optimization, "do_constant_variable_unlinked") == 0) {
+      return do_constant_variable_unlinked(ir);
+   } else if (strcmp(optimization, "do_copy_propagation") == 0) {
+      return do_copy_propagation(ir);
+   } else if (strcmp(optimization, "do_copy_propagation_elements") == 0) {
+      return do_copy_propagation_elements(ir);
+   } else if (strcmp(optimization, "do_constant_propagation") == 0) {
+      return do_constant_propagation(ir);
+   } else if (strcmp(optimization, "do_dead_code") == 0) {
+      return do_dead_code(ir);
+   } else if (strcmp(optimization, "do_dead_code_local") == 0) {
+      return do_dead_code_local(ir);
+   } else if (strcmp(optimization, "do_dead_code_unlinked") == 0) {
+      return do_dead_code_unlinked(ir);
+   } else if (strcmp(optimization, "do_dead_functions") == 0) {
+      return do_dead_functions(ir);
+   } else if (strcmp(optimization, "do_function_inlining") == 0) {
+      return do_function_inlining(ir);
+   } else if (sscanf(optimization,
+                     "do_lower_jumps ( %d , %d , %d , %d , %d ) ",
+                     &int_0, &int_1, &int_2, &int_3, &int_4) == 5) {
+      return do_lower_jumps(ir, int_0 != 0, int_1 != 0, int_2 != 0,
+                            int_3 != 0, int_4 != 0);
+   } else if (strcmp(optimization, "do_lower_texture_projection") == 0) {
+      return do_lower_texture_projection(ir);
+   } else if (strcmp(optimization, "do_if_simplification") == 0) {
+      return do_if_simplification(ir);
+   } else if (strcmp(optimization, "do_discard_simplification") == 0) {
+      return do_discard_simplification(ir);
+   } else if (sscanf(optimization, "lower_if_to_cond_assign ( %d ) ",
+                     &int_0) == 1) {
+      return lower_if_to_cond_assign(ir, int_0);
+   } else if (strcmp(optimization, "do_mat_op_to_vec") == 0) {
+      return do_mat_op_to_vec(ir);
+   } else if (strcmp(optimization, "do_noop_swizzle") == 0) {
+      return do_noop_swizzle(ir);
+   } else if (strcmp(optimization, "do_structure_splitting") == 0) {
+      return do_structure_splitting(ir);
+   } else if (strcmp(optimization, "do_swizzle_swizzle") == 0) {
+      return do_swizzle_swizzle(ir);
+   } else if (strcmp(optimization, "do_tree_grafting") == 0) {
+      return do_tree_grafting(ir);
+   } else if (strcmp(optimization, "do_vec_index_to_cond_assign") == 0) {
+      return do_vec_index_to_cond_assign(ir);
+   } else if (strcmp(optimization, "do_vec_index_to_swizzle") == 0) {
+      return do_vec_index_to_swizzle(ir);
+   } else if (strcmp(optimization, "lower_discard") == 0) {
+      return lower_discard(ir);
+   } else if (sscanf(optimization, "lower_instructions ( %d ) ",
+                     &int_0) == 1) {
+      return lower_instructions(ir, int_0);
+   } else if (strcmp(optimization, "lower_noise") == 0) {
+      return lower_noise(ir);
+   } else if (sscanf(optimization, "lower_variable_index_to_cond_assign "
+                     "( %d , %d , %d , %d ) ", &int_0, &int_1, &int_2,
+                     &int_3) == 4) {
+      return lower_variable_index_to_cond_assign(ir, int_0 != 0, int_1 != 0,
+                                                 int_2 != 0, int_3 != 0);
+   } else if (sscanf(optimization, "lower_quadop_vector ( %d ) ",
+                     &int_0) == 1) {
+      return lower_quadop_vector(ir, int_0 != 0);
+   } else if (strcmp(optimization, "optimize_redundant_jumps") == 0) {
+      return optimize_redundant_jumps(ir);
+   } else {
+      printf("Unrecognized optimization %s\n", optimization);
+      exit(EXIT_FAILURE);
+      return false;
+   }
+}
+
+static GLboolean
+do_optimization_passes(struct exec_list *ir, char **optimizations,
+                       int num_optimizations, bool quiet)
+{
+   GLboolean overall_progress = false;
+
+   for (int i = 0; i < num_optimizations; ++i) {
+      const char *optimization = optimizations[i];
+      if (!quiet) {
+         printf("*** Running optimization %s...", optimization);
+      }
+      GLboolean progress = do_optimization(ir, optimization);
+      if (!quiet) {
+         printf("%s\n", progress ? "progress" : "no progress");
+      }
+      validate_ir_tree(ir);
+
+      overall_progress = overall_progress || progress;
+   }
+
+   return overall_progress;
+}
+
+int test_optpass(int argc, char **argv)
+{
+   int input_format_ir = 0; /* 0=glsl, 1=ir */
+   int loop = 0;
+   int shader_type = GL_VERTEX_SHADER;
+   int quiet = 0;
+
+   const struct option optpass_opts[] = {
+      { "input-ir", no_argument, &input_format_ir, 1 },
+      { "input-glsl", no_argument, &input_format_ir, 0 },
+      { "loop", no_argument, &loop, 1 },
+      { "vertex-shader", no_argument, &shader_type, GL_VERTEX_SHADER },
+      { "fragment-shader", no_argument, &shader_type, GL_FRAGMENT_SHADER },
+      { "quiet", no_argument, &quiet, 1 },
+      { NULL, 0, NULL, 0 }
+   };
+
+   int idx = 0;
+   int c;
+   while ((c = getopt_long(argc, argv, "", optpass_opts, &idx)) != -1) {
+      if (c != 0) {
+         printf("*** usage: %s optpass <optimizations> <options>\n", argv[0]);
+         printf("\n");
+         printf("Possible options are:\n");
+         printf("  --input-ir: input format is IR\n");
+         printf("  --input-glsl: input format is GLSL (the default)\n");
+         printf("  --loop: run optimizations repeatedly until no progress\n");
+         printf("  --vertex-shader: test with a vertex shader (the default)\n");
+         printf("  --fragment-shader: test with a fragment shader\n");
+         exit(EXIT_FAILURE);
+      }
+   }
+
+   struct gl_context local_ctx;
+   struct gl_context *ctx = &local_ctx;
+   initialize_context_to_defaults(ctx, API_OPENGL);
+
+   ctx->Driver.NewShader = _mesa_new_shader;
+
+   struct gl_shader *shader = rzalloc(NULL, struct gl_shader);
+   shader->Type = shader_type;
+
+   string input = read_stdin_to_eof();
+
+   struct _mesa_glsl_parse_state *state
+      = new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
+
+   if (input_format_ir) {
+      shader->ir = new(shader) exec_list;
+      _mesa_glsl_initialize_types(state);
+      _mesa_glsl_read_ir(state, shader->ir, input.c_str(), true);
+   } else {
+      shader->Source = input.c_str();
+      const char *source = shader->Source;
+      state->error = preprocess(state, &source, &state->info_log,
+                                state->extensions, ctx->API) != 0;
+
+      if (!state->error) {
+         _mesa_glsl_lexer_ctor(state, source);
+         _mesa_glsl_parse(state);
+         _mesa_glsl_lexer_dtor(state);
+      }
+
+      shader->ir = new(shader) exec_list;
+      if (!state->error && !state->translation_unit.is_empty())
+         _mesa_ast_to_hir(shader->ir, state);
+   }
+
+   /* Print out the initial IR */
+   if (!state->error && !quiet) {
+      printf("*** pre-optimization IR:\n");
+      _mesa_print_ir(shader->ir, state);
+      printf("\n--\n");
+   }
+
+   /* Optimization passes */
+   if (!state->error) {
+      GLboolean progress;
+      do {
+         progress = do_optimization_passes(shader->ir, &argv[optind],
+                                           argc - optind, quiet != 0);
+      } while (loop && progress);
+   }
+
+   /* Print out the resulting IR */
+   if (!state->error) {
+      if (!quiet) {
+         printf("*** resulting IR:\n");
+      }
+      _mesa_print_ir(shader->ir, state);
+      if (!quiet) {
+         printf("\n--\n");
+      }
+   }
+
+   if (state->error) {
+      printf("*** error(s) occurred:\n");
+      printf("%s\n", state->info_log);
+      printf("--\n");
+   }
+
+   ralloc_free(state);
+   ralloc_free(shader);
+
+   return state->error;
+}
+
diff --git a/src/glsl/test_optpass.h b/src/glsl/test_optpass.h
new file mode 100644
index 00000000000..923ccf3dece
--- /dev/null
+++ b/src/glsl/test_optpass.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+#ifndef TEST_OPTPASS_H
+#define TEST_OPTPASS_H
+
+int test_optpass(int argc, char **argv);
+
+#endif /* TEST_OPTPASS_H */

From 659cdedb532e675da5676d40ee39278aadd8f0a1 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Tue, 5 Jul 2011 11:52:06 -0700
Subject: [PATCH 046/600] glsl: Add unit tests for lower_jumps.cpp

These tests invoke do_lower_jumps() in isolation (using the glsl_test
executable) and verify that it transforms the IR in the expected way.

The unit tests may be run from the top level directory using "make
check".

For reference, I've also checked in the Python script
create_test_cases.py, which was used to generate these tests.  It is
not necessary to run this script in order to run the tests.

Acked-by: Chad Versace <chad@chad-versace.us>
---
 Makefile                                      |   6 +-
 src/glsl/tests/compare_ir                     |  59 ++
 src/glsl/tests/lower_jumps/.gitignore         |   1 +
 .../tests/lower_jumps/create_test_cases.py    | 643 ++++++++++++++++++
 .../tests/lower_jumps/lower_breaks_1.opt_test |  13 +
 .../lower_breaks_1.opt_test.expected          |   5 +
 .../tests/lower_jumps/lower_breaks_2.opt_test |  15 +
 .../lower_breaks_2.opt_test.expected          |   7 +
 .../tests/lower_jumps/lower_breaks_3.opt_test |  17 +
 .../lower_breaks_3.opt_test.expected          |   8 +
 .../tests/lower_jumps/lower_breaks_4.opt_test |  15 +
 .../lower_breaks_4.opt_test.expected          |   7 +
 .../tests/lower_jumps/lower_breaks_5.opt_test |  16 +
 .../lower_breaks_5.opt_test.expected          |   7 +
 .../tests/lower_jumps/lower_breaks_6.opt_test |  29 +
 .../lower_breaks_6.opt_test.expected          |  29 +
 .../lower_guarded_conditional_break.opt_test  |  21 +
 ...uarded_conditional_break.opt_test.expected |  20 +
 .../lower_pulled_out_jump.opt_test            |  28 +
 .../lower_pulled_out_jump.opt_test.expected   |  25 +
 .../lower_jumps/lower_returns_1.opt_test      |  12 +
 .../lower_returns_1.opt_test.expected         |   4 +
 .../lower_jumps/lower_returns_2.opt_test      |  13 +
 .../lower_returns_2.opt_test.expected         |   5 +
 .../lower_jumps/lower_returns_3.opt_test      |  20 +
 .../lower_returns_3.opt_test.expected         |  21 +
 .../lower_jumps/lower_returns_4.opt_test      |  14 +
 .../lower_returns_4.opt_test.expected         |  16 +
 .../lower_returns_main_false.opt_test         |  17 +
 ...lower_returns_main_false.opt_test.expected |   8 +
 .../lower_returns_main_true.opt_test          |  17 +
 .../lower_returns_main_true.opt_test.expected |  13 +
 .../lower_returns_sub_false.opt_test          |  16 +
 .../lower_returns_sub_false.opt_test.expected |   8 +
 .../lower_returns_sub_true.opt_test           |  16 +
 .../lower_returns_sub_true.opt_test.expected  |  13 +
 .../lower_unified_returns.opt_test            |  26 +
 .../lower_unified_returns.opt_test.expected   |  21 +
 .../remove_continue_at_end_of_loop.opt_test   |  13 +
 ..._continue_at_end_of_loop.opt_test.expected |   5 +
 ...void_at_end_of_loop_lower_nothing.opt_test |  16 +
 ...nd_of_loop_lower_nothing.opt_test.expected |   8 +
 ..._void_at_end_of_loop_lower_return.opt_test |  16 +
 ...end_of_loop_lower_return.opt_test.expected |  19 +
 ...nd_of_loop_lower_return_and_break.opt_test |  16 +
 ...p_lower_return_and_break.opt_test.expected |  19 +
 ...void_at_end_of_loop_lower_nothing.opt_test |  14 +
 ...nd_of_loop_lower_nothing.opt_test.expected |   6 +
 ..._void_at_end_of_loop_lower_return.opt_test |  14 +
 ...end_of_loop_lower_return.opt_test.expected |  11 +
 ...nd_of_loop_lower_return_and_break.opt_test |  14 +
 ...p_lower_return_and_break.opt_test.expected |  11 +
 src/glsl/tests/optimization-test              |  28 +
 src/glsl/tests/sexps.py                       | 103 +++
 54 files changed, 1543 insertions(+), 1 deletion(-)
 create mode 100755 src/glsl/tests/compare_ir
 create mode 100644 src/glsl/tests/lower_jumps/.gitignore
 create mode 100644 src/glsl/tests/lower_jumps/create_test_cases.py
 create mode 100755 src/glsl/tests/lower_jumps/lower_breaks_1.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_breaks_1.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_breaks_2.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_breaks_3.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_breaks_4.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_breaks_5.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_breaks_6.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_returns_1.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_returns_1.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_returns_2.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_returns_2.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_returns_3.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_returns_4.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/lower_unified_returns.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test.expected
 create mode 100755 src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test
 create mode 100644 src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test.expected
 create mode 100755 src/glsl/tests/optimization-test
 create mode 100644 src/glsl/tests/sexps.py

diff --git a/Makefile b/Makefile
index 817f3d3706a..916c498416d 100644
--- a/Makefile
+++ b/Makefile
@@ -21,6 +21,10 @@ all: default
 doxygen:
 	cd doxygen && $(MAKE)
 
+check:
+	cd src/glsl/tests/ && ./optimization-test
+	@echo "All tests passed."
+
 clean:
 	-@touch $(TOP)/configs/current
 	-@for dir in $(SUBDIRS) ; do \
@@ -51,7 +55,7 @@ install:
 	done
 
 
-.PHONY: default doxygen clean realclean distclean install
+.PHONY: default doxygen clean realclean distclean install check
 
 # If there's no current configuration file
 $(TOP)/configs/current:
diff --git a/src/glsl/tests/compare_ir b/src/glsl/tests/compare_ir
new file mode 100755
index 00000000000..a40fc810cf3
--- /dev/null
+++ b/src/glsl/tests/compare_ir
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# coding=utf-8
+#
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Compare two files containing IR code.  Ignore formatting differences
+# and declaration order.
+
+import os
+import os.path
+import subprocess
+import sys
+import tempfile
+
+from sexps import *
+
+if len(sys.argv) != 3:
+    print 'Usage: compare_ir <file1> <file2>'
+    exit(1)
+
+with open(sys.argv[1]) as f:
+    ir1 = sort_decls(parse_sexp(f.read()))
+with open(sys.argv[2]) as f:
+    ir2 = sort_decls(parse_sexp(f.read()))
+
+if ir1 == ir2:
+    exit(0)
+else:
+    file1, path1 = tempfile.mkstemp(os.path.basename(sys.argv[1]))
+    file2, path2 = tempfile.mkstemp(os.path.basename(sys.argv[2]))
+    try:
+        os.write(file1, '{0}\n'.format(sexp_to_string(ir1)))
+        os.close(file1)
+        os.write(file2, '{0}\n'.format(sexp_to_string(ir2)))
+        os.close(file2)
+        subprocess.call(['diff', '-u', path1, path2])
+    finally:
+        os.remove(path1)
+        os.remove(path2)
+    exit(1)
diff --git a/src/glsl/tests/lower_jumps/.gitignore b/src/glsl/tests/lower_jumps/.gitignore
new file mode 100644
index 00000000000..f47cb2045f1
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/.gitignore
@@ -0,0 +1 @@
+*.out
diff --git a/src/glsl/tests/lower_jumps/create_test_cases.py b/src/glsl/tests/lower_jumps/create_test_cases.py
new file mode 100644
index 00000000000..fbc6f0a84ea
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/create_test_cases.py
@@ -0,0 +1,643 @@
+# coding=utf-8
+#
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+import os
+import os.path
+import re
+import subprocess
+import sys
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access to sexps.py, which is in parent dir
+from sexps import *
+
+def make_test_case(f_name, ret_type, body):
+    """Create a simple optimization test case consisting of a single
+    function with the given name, return type, and body.
+
+    Global declarations are automatically created for any undeclared
+    variables that are referenced by the function.  All undeclared
+    variables are assumed to be floats.
+    """
+    check_sexp(body)
+    declarations = {}
+    def make_declarations(sexp, already_declared = ()):
+        if isinstance(sexp, list):
+            if len(sexp) == 2 and sexp[0] == 'var_ref':
+                if sexp[1] not in already_declared:
+                    declarations[sexp[1]] = [
+                        'declare', ['in'], 'float', sexp[1]]
+            elif len(sexp) == 4 and sexp[0] == 'assign':
+                assert sexp[2][0] == 'var_ref'
+                if sexp[2][1] not in already_declared:
+                    declarations[sexp[2][1]] = [
+                        'declare', ['out'], 'float', sexp[2][1]]
+                make_declarations(sexp[3], already_declared)
+            else:
+                already_declared = set(already_declared)
+                for s in sexp:
+                    if isinstance(s, list) and len(s) >= 4 and \
+                            s[0] == 'declare':
+                        already_declared.add(s[3])
+                    else:
+                        make_declarations(s, already_declared)
+    make_declarations(body)
+    return declarations.values() + \
+        [['function', f_name, ['signature', ret_type, ['parameters'], body]]]
+
+
+# The following functions can be used to build expressions.
+
+def const_float(value):
+    """Create an expression representing the given floating point value."""
+    return ['constant', 'float', ['{0:.6f}'.format(value)]]
+
+def const_bool(value):
+    """Create an expression representing the given boolean value.
+
+    If value is not a boolean, it is converted to a boolean.  So, for
+    instance, const_bool(1) is equivalent to const_bool(True).
+    """
+    return ['constant', 'bool', ['{0}'.format(1 if value else 0)]]
+
+def gt_zero(var_name):
+    """Create Construct the expression var_name > 0"""
+    return ['expression', 'bool', '>', ['var_ref', var_name], const_float(0)]
+
+
+# The following functions can be used to build complex control flow
+# statements.  All of these functions return statement lists (even
+# those which only create a single statement), so that statements can
+# be sequenced together using the '+' operator.
+
+def return_(value = None):
+    """Create a return statement."""
+    if value is not None:
+        return [['return', value]]
+    else:
+        return [['return']]
+
+def break_():
+    """Create a break statement."""
+    return ['break']
+
+def continue_():
+    """Create a continue statement."""
+    return ['continue']
+
+def simple_if(var_name, then_statements, else_statements = None):
+    """Create a statement of the form
+
+    if (var_name > 0.0) {
+       <then_statements>
+    } else {
+       <else_statements>
+    }
+
+    else_statements may be omitted.
+    """
+    if else_statements is None:
+        else_statements = []
+    check_sexp(then_statements)
+    check_sexp(else_statements)
+    return [['if', gt_zero(var_name), then_statements, else_statements]]
+
+def loop(statements):
+    """Create a loop containing the given statements as its loop
+    body.
+    """
+    check_sexp(statements)
+    return [['loop', [], [], [], [], statements]]
+
+def declare_temp(var_type, var_name):
+    """Create a declaration of the form
+
+    (declare (temporary) <var_type> <var_name)
+    """
+    return [['declare', ['temporary'], var_type, var_name]]
+
+def assign_x(var_name, value):
+    """Create a statement that assigns <value> to the variable
+    <var_name>.  The assignment uses the mask (x).
+    """
+    check_sexp(value)
+    return [['assign', ['x'], ['var_ref', var_name], value]]
+
+def complex_if(var_prefix, statements):
+    """Create a statement of the form
+
+    if (<var_prefix>a > 0.0) {
+       if (<var_prefix>b > 0.0) {
+          <statements>
+       }
+    }
+
+    This is useful in testing jump lowering, because if <statements>
+    ends in a jump, lower_jumps.cpp won't try to combine this
+    construct with the code that follows it, as it might do for a
+    simple if.
+
+    All variables used in the if statement are prefixed with
+    var_prefix.  This can be used to ensure uniqueness.
+    """
+    check_sexp(statements)
+    return simple_if(var_prefix + 'a', simple_if(var_prefix + 'b', statements))
+
+def declare_execute_flag():
+    """Create the statements that lower_jumps.cpp uses to declare and
+    initialize the temporary boolean execute_flag.
+    """
+    return declare_temp('bool', 'execute_flag') + \
+        assign_x('execute_flag', const_bool(True))
+
+def declare_return_flag():
+    """Create the statements that lower_jumps.cpp uses to declare and
+    initialize the temporary boolean return_flag.
+    """
+    return declare_temp('bool', 'return_flag') + \
+        assign_x('return_flag', const_bool(False))
+
+def declare_return_value():
+    """Create the statements that lower_jumps.cpp uses to declare and
+    initialize the temporary variable return_value.  Assume that
+    return_value is a float.
+    """
+    return declare_temp('float', 'return_value')
+
+def declare_break_flag():
+    """Create the statements that lower_jumps.cpp uses to declare and
+    initialize the temporary boolean break_flag.
+    """
+    return declare_temp('bool', 'break_flag') + \
+        assign_x('break_flag', const_bool(False))
+
+def lowered_return_simple(value = None):
+    """Create the statements that lower_jumps.cpp lowers a return
+    statement to, in situations where it does not need to clear the
+    execute flag.
+    """
+    if value:
+        result = assign_x('return_value', value)
+    else:
+        result = []
+    return result + assign_x('return_flag', const_bool(True))
+
+def lowered_return(value = None):
+    """Create the statements that lower_jumps.cpp lowers a return
+    statement to, in situations where it needs to clear the execute
+    flag.
+    """
+    return lowered_return_simple(value) + \
+        assign_x('execute_flag', const_bool(False))
+
+def lowered_continue():
+    """Create the statement that lower_jumps.cpp lowers a continue
+    statement to.
+    """
+    return assign_x('execute_flag', const_bool(False))
+
+def lowered_break_simple():
+    """Create the statement that lower_jumps.cpp lowers a break
+    statement to, in situations where it does not need to clear the
+    execute flag.
+    """
+    return assign_x('break_flag', const_bool(True))
+
+def lowered_break():
+    """Create the statement that lower_jumps.cpp lowers a break
+    statement to, in situations where it needs to clear the execute
+    flag.
+    """
+    return lowered_break_simple() + assign_x('execute_flag', const_bool(False))
+
+def if_execute_flag(statements):
+    """Wrap statements in an if test so that they will only execute if
+    execute_flag is True.
+    """
+    check_sexp(statements)
+    return [['if', ['var_ref', 'execute_flag'], statements, []]]
+
+def if_not_return_flag(statements):
+    """Wrap statements in an if test so that they will only execute if
+    return_flag is False.
+    """
+    check_sexp(statements)
+    return [['if', ['var_ref', 'return_flag'], [], statements]]
+
+def final_return():
+    """Create the return statement that lower_jumps.cpp places at the
+    end of a function when lowering returns.
+    """
+    return [['return', ['var_ref', 'return_value']]]
+
+def final_break():
+    """Create the conditional break statement that lower_jumps.cpp
+    places at the end of a function when lowering breaks.
+    """
+    return [['if', ['var_ref', 'break_flag'], break_(), []]]
+
+def bash_quote(*args):
+    """Quote the arguments appropriately so that bash will understand
+    each argument as a single word.
+    """
+    def quote_word(word):
+        for c in word:
+            if not (c.isalpha() or c.isdigit() or c in '@%_-+=:,./'):
+                break
+        else:
+            if not word:
+                return "''"
+            return word
+        return "'{0}'".format(word.replace("'", "'\"'\"'"))
+    return ' '.join(quote_word(word) for word in args)
+
+def create_test_case(doc_string, input_sexp, expected_sexp, test_name,
+                     pull_out_jumps=False, lower_sub_return=False,
+                     lower_main_return=False, lower_continue=False,
+                     lower_break=False):
+    """Create a test case that verifies that do_lower_jumps transforms
+    the given code in the expected way.
+    """
+    doc_lines = [line.strip() for line in doc_string.splitlines()]
+    doc_string = ''.join('# {0}\n'.format(line) for line in doc_lines if line != '')
+    check_sexp(input_sexp)
+    check_sexp(expected_sexp)
+    input_str = sexp_to_string(sort_decls(input_sexp))
+    expected_output = sexp_to_string(sort_decls(expected_sexp))
+
+    optimization = (
+        'do_lower_jumps({0:d}, {1:d}, {2:d}, {3:d}, {4:d})'.format(
+            pull_out_jumps, lower_sub_return, lower_main_return,
+            lower_continue, lower_break))
+    args = ['../../glsl_test', 'optpass', '--quiet', '--input-ir', optimization]
+    test_file = '{0}.opt_test'.format(test_name)
+    with open(test_file, 'w') as f:
+        f.write('#!/bin/bash\n#\n# This file was generated by create_test_cases.py.\n#\n')
+        f.write(doc_string)
+        f.write('{0} <<EOF\n'.format(bash_quote(*args)))
+        f.write('{0}\nEOF\n'.format(input_str))
+    os.chmod(test_file, 0774)
+    expected_file = '{0}.opt_test.expected'.format(test_name)
+    with open(expected_file, 'w') as f:
+        f.write('{0}\n'.format(expected_output))
+
+def test_lower_returns_main():
+    doc_string = """Test that do_lower_jumps respects the lower_main_return
+    flag in deciding whether to lower returns in the main
+    function.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            complex_if('', return_())
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            declare_execute_flag() +
+            declare_return_flag() +
+            complex_if('', lowered_return())
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_main_true',
+                     lower_main_return=True)
+    create_test_case(doc_string, input_sexp, input_sexp, 'lower_returns_main_false',
+                     lower_main_return=False)
+
+def test_lower_returns_sub():
+    doc_string = """Test that do_lower_jumps respects the lower_sub_return flag
+    in deciding whether to lower returns in subroutines.
+    """
+    input_sexp = make_test_case('sub', 'void', (
+            complex_if('', return_())
+            ))
+    expected_sexp = make_test_case('sub', 'void', (
+            declare_execute_flag() +
+            declare_return_flag() +
+            complex_if('', lowered_return())
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_sub_true',
+                     lower_sub_return=True)
+    create_test_case(doc_string, input_sexp, input_sexp, 'lower_returns_sub_false',
+                     lower_sub_return=False)
+
+def test_lower_returns_1():
+    doc_string = """Test that a void return at the end of a function is
+    eliminated.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            assign_x('a', const_float(1)) +
+            return_()
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            assign_x('a', const_float(1))
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_1',
+                     lower_main_return=True)
+
+def test_lower_returns_2():
+    doc_string = """Test that lowering is not performed on a non-void return at
+    the end of subroutine.
+    """
+    input_sexp = make_test_case('sub', 'float', (
+            assign_x('a', const_float(1)) +
+            return_(const_float(1))
+            ))
+    create_test_case(doc_string, input_sexp, input_sexp, 'lower_returns_2',
+                     lower_sub_return=True)
+
+def test_lower_returns_3():
+    doc_string = """Test lowering of returns when there is one nested inside a
+    complex structure of ifs, and one at the end of a function.
+
+    In this case, the latter return needs to be lowered because it
+    will not be at the end of the function once the final return
+    is inserted.
+    """
+    input_sexp = make_test_case('sub', 'float', (
+            complex_if('', return_(const_float(1))) +
+            return_(const_float(2))
+            ))
+    expected_sexp = make_test_case('sub', 'float', (
+            declare_execute_flag() +
+            declare_return_value() +
+            declare_return_flag() +
+            complex_if('', lowered_return(const_float(1))) +
+            if_execute_flag(lowered_return(const_float(2))) +
+            final_return()
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_3',
+                     lower_sub_return=True)
+
+def test_lower_returns_4():
+    doc_string = """Test that returns are properly lowered when they occur in
+    both branches of an if-statement.
+    """
+    input_sexp = make_test_case('sub', 'float', (
+            simple_if('a', return_(const_float(1)),
+                      return_(const_float(2)))
+            ))
+    expected_sexp = make_test_case('sub', 'float', (
+            declare_execute_flag() +
+            declare_return_value() +
+            declare_return_flag() +
+            simple_if('a', lowered_return(const_float(1)),
+                      lowered_return(const_float(2))) +
+            final_return()
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_returns_4',
+                     lower_sub_return=True)
+
+def test_lower_unified_returns():
+    doc_string = """If both branches of an if statement end in a return, and
+    pull_out_jumps is True, then those returns should be lifted
+    outside the if and then properly lowered.
+
+    Verify that this lowering occurs during the same pass as the
+    lowering of other returns by checking that extra temporary
+    variables aren't generated.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            complex_if('a', return_()) +
+            simple_if('b', simple_if('c', return_(), return_()))
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            declare_execute_flag() +
+            declare_return_flag() +
+            complex_if('a', lowered_return()) +
+            if_execute_flag(simple_if('b', (simple_if('c', [], []) +
+                                            lowered_return())))
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_unified_returns',
+                     lower_main_return=True, pull_out_jumps=True)
+
+def test_lower_pulled_out_jump():
+    doc_string = """If one branch of an if ends in a jump, and control cannot
+    fall out the bottom of the other branch, and pull_out_jumps is
+    True, then the jump is lifted outside the if.
+
+    Verify that this lowering occurs during the same pass as the
+    lowering of other jumps by checking that extra temporary
+    variables aren't generated.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            complex_if('a', return_()) +
+            loop(simple_if('b', simple_if('c', break_(), continue_()),
+                           return_())) +
+            assign_x('d', const_float(1))
+            ))
+    # Note: optimization produces two other effects: the break
+    # gets lifted out of the if statements, and the code after the
+    # loop gets guarded so that it only executes if the return
+    # flag is clear.
+    expected_sexp = make_test_case('main', 'void', (
+            declare_execute_flag() +
+            declare_return_flag() +
+            complex_if('a', lowered_return()) +
+            if_execute_flag(
+                loop(simple_if('b', simple_if('c', [], continue_()),
+                               lowered_return_simple()) +
+                     break_()) +
+                if_not_return_flag(assign_x('d', const_float(1))))
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_pulled_out_jump',
+                     lower_main_return=True, pull_out_jumps=True)
+
+def test_lower_breaks_1():
+    doc_string = """If a loop contains an unconditional break at the bottom of
+    it, it should not be lowered."""
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 break_())
+            ))
+    expected_sexp = input_sexp
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_1', lower_break=True)
+
+def test_lower_breaks_2():
+    doc_string = """If a loop contains a conditional break at the bottom of it,
+    it should not be lowered if it is in the then-clause.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 simple_if('b', break_()))
+            ))
+    expected_sexp = input_sexp
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_2', lower_break=True)
+
+def test_lower_breaks_3():
+    doc_string = """If a loop contains a conditional break at the bottom of it,
+    it should not be lowered if it is in the then-clause, even if
+    there are statements preceding the break.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 simple_if('b', (assign_x('c', const_float(1)) +
+                                 break_())))
+            ))
+    expected_sexp = input_sexp
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_3', lower_break=True)
+
+def test_lower_breaks_4():
+    doc_string = """If a loop contains a conditional break at the bottom of it,
+    it should not be lowered if it is in the else-clause.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 simple_if('b', [], break_()))
+            ))
+    expected_sexp = input_sexp
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_4', lower_break=True)
+
+def test_lower_breaks_5():
+    doc_string = """If a loop contains a conditional break at the bottom of it,
+    it should not be lowered if it is in the else-clause, even if
+    there are statements preceding the break.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 simple_if('b', [], (assign_x('c', const_float(1)) +
+                                     break_())))
+            ))
+    expected_sexp = input_sexp
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_5', lower_break=True)
+
+def test_lower_breaks_6():
+    doc_string = """If a loop contains conditional breaks and continues, and
+    ends in an unconditional break, then the unconditional break
+    needs to be lowered, because it will no longer be at the end
+    of the loop after the final break is added.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(simple_if('a', (complex_if('b', continue_()) +
+                                 complex_if('c', break_()))) +
+                 break_())
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            declare_break_flag() +
+            loop(declare_execute_flag() +
+                 simple_if(
+                    'a',
+                    (complex_if('b', lowered_continue()) +
+                     if_execute_flag(
+                            complex_if('c', lowered_break())))) +
+                 if_execute_flag(lowered_break_simple()) +
+                 final_break())
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_breaks_6',
+                     lower_break=True, lower_continue=True)
+
+def test_lower_guarded_conditional_break():
+    doc_string = """Normally a conditional break at the end of a loop isn't
+    lowered, however if the conditional break gets placed inside
+    an if(execute_flag) because of earlier lowering of continues,
+    then the break needs to be lowered.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(complex_if('a', continue_()) +
+                 simple_if('b', break_()))
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            declare_break_flag() +
+            loop(declare_execute_flag() +
+                 complex_if('a', lowered_continue()) +
+                 if_execute_flag(simple_if('b', lowered_break())) +
+                 final_break())
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'lower_guarded_conditional_break',
+                     lower_break=True, lower_continue=True)
+
+def test_remove_continue_at_end_of_loop():
+    doc_string = """Test that a redundant continue-statement at the end of a
+    loop is removed.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 continue_())
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)))
+            ))
+    create_test_case(doc_string, input_sexp, expected_sexp, 'remove_continue_at_end_of_loop')
+
+def test_lower_return_void_at_end_of_loop():
+    doc_string = """Test that a return of void at the end of a loop is properly
+    lowered.
+    """
+    input_sexp = make_test_case('main', 'void', (
+            loop(assign_x('a', const_float(1)) +
+                 return_()) +
+            assign_x('b', const_float(2))
+            ))
+    expected_sexp = make_test_case('main', 'void', (
+            declare_return_flag() +
+            loop(assign_x('a', const_float(1)) +
+                 lowered_return_simple() +
+                 break_()) +
+            if_not_return_flag(assign_x('b', const_float(2)))
+            ))
+    create_test_case(doc_string, input_sexp, input_sexp, 'return_void_at_end_of_loop_lower_nothing')
+    create_test_case(doc_string, input_sexp, expected_sexp, 'return_void_at_end_of_loop_lower_return',
+                     lower_main_return=True)
+    create_test_case(doc_string, input_sexp, expected_sexp, 'return_void_at_end_of_loop_lower_return_and_break',
+                     lower_main_return=True, lower_break=True)
+
+def test_lower_return_non_void_at_end_of_loop():
+    doc_string = """Test that a non-void return at the end of a loop is
+    properly lowered.
+    """
+    input_sexp = make_test_case('sub', 'float', (
+            loop(assign_x('a', const_float(1)) +
+                 return_(const_float(2))) +
+            assign_x('b', const_float(3)) +
+            return_(const_float(4))
+            ))
+    expected_sexp = make_test_case('sub', 'float', (
+            declare_execute_flag() +
+            declare_return_value() +
+            declare_return_flag() +
+            loop(assign_x('a', const_float(1)) +
+                 lowered_return_simple(const_float(2)) +
+                 break_()) +
+            if_not_return_flag(assign_x('b', const_float(3)) +
+                               lowered_return(const_float(4))) +
+            final_return()
+            ))
+    create_test_case(doc_string, input_sexp, input_sexp, 'return_non_void_at_end_of_loop_lower_nothing')
+    create_test_case(doc_string, input_sexp, expected_sexp, 'return_non_void_at_end_of_loop_lower_return',
+                     lower_sub_return=True)
+    create_test_case(doc_string, input_sexp, expected_sexp, 'return_non_void_at_end_of_loop_lower_return_and_break',
+                     lower_sub_return=True, lower_break=True)
+
+if __name__ == '__main__':
+    test_lower_returns_main()
+    test_lower_returns_sub()
+    test_lower_returns_1()
+    test_lower_returns_2()
+    test_lower_returns_3()
+    test_lower_returns_4()
+    test_lower_unified_returns()
+    test_lower_pulled_out_jump()
+    test_lower_breaks_1()
+    test_lower_breaks_2()
+    test_lower_breaks_3()
+    test_lower_breaks_4()
+    test_lower_breaks_5()
+    test_lower_breaks_6()
+    test_lower_guarded_conditional_break()
+    test_remove_continue_at_end_of_loop()
+    test_lower_return_void_at_end_of_loop()
+    test_lower_return_non_void_at_end_of_loop()
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test
new file mode 100755
index 00000000000..01ad7087a28
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test
@@ -0,0 +1,13 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If a loop contains an unconditional break at the bottom of
+# it, it should not be lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF
+((declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) break))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test.expected
new file mode 100644
index 00000000000..d4bb6fc0274
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_1.opt_test.expected
@@ -0,0 +1,5 @@
+((declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) break))))))
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test
new file mode 100755
index 00000000000..0be22f953e1
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test
@@ -0,0 +1,15 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If a loop contains a conditional break at the bottom of it,
+# it should not be lowered if it is in the then-clause.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF
+((declare (in) float b) (declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000))) (break)
+       ())))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected
new file mode 100644
index 00000000000..a4cb2d6a125
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_2.opt_test.expected
@@ -0,0 +1,7 @@
+((declare (in) float b) (declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000))) (break)
+       ())))))))
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test
new file mode 100755
index 00000000000..4149360b5d0
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If a loop contains a conditional break at the bottom of it,
+# it should not be lowered if it is in the then-clause, even if
+# there are statements preceding the break.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF
+((declare (in) float b) (declare (out) float a) (declare (out) float c)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((assign (x) (var_ref c) (constant float (1.000000))) break)
+       ())))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected
new file mode 100644
index 00000000000..325f7b49a5d
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_3.opt_test.expected
@@ -0,0 +1,8 @@
+((declare (in) float b) (declare (out) float a) (declare (out) float c)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((assign (x) (var_ref c) (constant float (1.000000))) break)
+       ())))))))
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test
new file mode 100755
index 00000000000..70458bb4f8e
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test
@@ -0,0 +1,15 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If a loop contains a conditional break at the bottom of it,
+# it should not be lowered if it is in the else-clause.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF
+((declare (in) float b) (declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000))) ()
+       (break))))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected
new file mode 100644
index 00000000000..a7735457cb8
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_4.opt_test.expected
@@ -0,0 +1,7 @@
+((declare (in) float b) (declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000))) ()
+       (break))))))))
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test
new file mode 100755
index 00000000000..da9eef1105e
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If a loop contains a conditional break at the bottom of it,
+# it should not be lowered if it is in the else-clause, even if
+# there are statements preceding the break.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 1)' <<EOF
+((declare (in) float b) (declare (out) float a) (declare (out) float c)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000))) ()
+       ((assign (x) (var_ref c) (constant float (1.000000))) break))))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected
new file mode 100644
index 00000000000..0dd4a529383
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_5.opt_test.expected
@@ -0,0 +1,7 @@
+((declare (in) float b) (declare (out) float a) (declare (out) float c)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (if (expression bool > (var_ref b) (constant float (0.000000))) ()
+       ((assign (x) (var_ref c) (constant float (1.000000))) break))))))))
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test
new file mode 100755
index 00000000000..9440dfec897
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If a loop contains conditional breaks and continues, and
+# ends in an unconditional break, then the unconditional break
+# needs to be lowered, because it will no longer be at the end
+# of the loop after the final break is added.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 1, 1)' <<EOF
+((declare (in) float a) (declare (in) float ba) (declare (in) float bb)
+ (declare (in) float ca)
+ (declare (in) float cb)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((if (expression bool > (var_ref a) (constant float (0.000000)))
+       ((if (expression bool > (var_ref ba) (constant float (0.000000)))
+         ((if (expression bool > (var_ref bb) (constant float (0.000000)))
+           (continue)
+           ()))
+         ())
+        (if (expression bool > (var_ref ca) (constant float (0.000000)))
+         ((if (expression bool > (var_ref cb) (constant float (0.000000)))
+           (break)
+           ()))
+         ()))
+       ())
+      break))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected
new file mode 100644
index 00000000000..8222328e00c
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_breaks_6.opt_test.expected
@@ -0,0 +1,29 @@
+((declare (in) float a) (declare (in) float ba) (declare (in) float bb)
+ (declare (in) float ca)
+ (declare (in) float cb)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool break_flag)
+    (assign (x) (var_ref break_flag) (constant bool (0)))
+    (loop () () () ()
+     ((declare (temporary) bool execute_flag)
+      (assign (x) (var_ref execute_flag) (constant bool (1)))
+      (if (expression bool > (var_ref a) (constant float (0.000000)))
+       ((if (expression bool > (var_ref ba) (constant float (0.000000)))
+         ((if (expression bool > (var_ref bb) (constant float (0.000000)))
+           ((assign (x) (var_ref execute_flag) (constant bool (0))))
+           ()))
+         ())
+        (if (var_ref execute_flag)
+         ((if (expression bool > (var_ref ca) (constant float (0.000000)))
+           ((if (expression bool > (var_ref cb) (constant float (0.000000)))
+             ((assign (x) (var_ref break_flag) (constant bool (1)))
+              (assign (x) (var_ref execute_flag) (constant bool (0))))
+             ()))
+           ()))
+         ()))
+       ())
+      (if (var_ref execute_flag)
+       ((assign (x) (var_ref break_flag) (constant bool (1))))
+       ())
+      (if (var_ref break_flag) (break) ())))))))
diff --git a/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test
new file mode 100755
index 00000000000..379aa59b5a2
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test
@@ -0,0 +1,21 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Normally a conditional break at the end of a loop isn't
+# lowered, however if the conditional break gets placed inside
+# an if(execute_flag) because of earlier lowering of continues,
+# then the break needs to be lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 1, 1)' <<EOF
+((declare (in) float aa) (declare (in) float ab) (declare (in) float b)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((if (expression bool > (var_ref aa) (constant float (0.000000)))
+       ((if (expression bool > (var_ref ab) (constant float (0.000000)))
+         (continue)
+         ()))
+       ())
+      (if (expression bool > (var_ref b) (constant float (0.000000))) (break)
+       ())))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected
new file mode 100644
index 00000000000..7c6e73f77f8
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_guarded_conditional_break.opt_test.expected
@@ -0,0 +1,20 @@
+((declare (in) float aa) (declare (in) float ab) (declare (in) float b)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool break_flag)
+    (assign (x) (var_ref break_flag) (constant bool (0)))
+    (loop () () () ()
+     ((declare (temporary) bool execute_flag)
+      (assign (x) (var_ref execute_flag) (constant bool (1)))
+      (if (expression bool > (var_ref aa) (constant float (0.000000)))
+       ((if (expression bool > (var_ref ab) (constant float (0.000000)))
+         ((assign (x) (var_ref execute_flag) (constant bool (0))))
+         ()))
+       ())
+      (if (var_ref execute_flag)
+       ((if (expression bool > (var_ref b) (constant float (0.000000)))
+         ((assign (x) (var_ref break_flag) (constant bool (1)))
+          (assign (x) (var_ref execute_flag) (constant bool (0))))
+         ()))
+       ())
+      (if (var_ref break_flag) (break) ())))))))
diff --git a/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test
new file mode 100755
index 00000000000..15f3c41d5a2
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If one branch of an if ends in a jump, and control cannot
+# fall out the bottom of the other branch, and pull_out_jumps is
+# True, then the jump is lifted outside the if.
+# Verify that this lowering occurs during the same pass as the
+# lowering of other jumps by checking that extra temporary
+# variables aren't generated.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(1, 0, 1, 0, 0)' <<EOF
+((declare (in) float aa) (declare (in) float ab) (declare (in) float b)
+ (declare (in) float c)
+ (declare (out) float d)
+ (function main
+  (signature void (parameters)
+   ((if (expression bool > (var_ref aa) (constant float (0.000000)))
+     ((if (expression bool > (var_ref ab) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())
+    (loop () () () ()
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((if (expression bool > (var_ref c) (constant float (0.000000))) (break)
+         (continue)))
+       ((return)))))
+    (assign (x) (var_ref d) (constant float (1.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected
new file mode 100644
index 00000000000..bf45c2c93b6
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_pulled_out_jump.opt_test.expected
@@ -0,0 +1,25 @@
+((declare (in) float aa) (declare (in) float ab) (declare (in) float b)
+ (declare (in) float c)
+ (declare (out) float d)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (if (expression bool > (var_ref aa) (constant float (0.000000)))
+     ((if (expression bool > (var_ref ab) (constant float (0.000000)))
+       ((assign (x) (var_ref return_flag) (constant bool (1)))
+        (assign (x) (var_ref execute_flag) (constant bool (0))))
+       ()))
+     ())
+    (if (var_ref execute_flag)
+     ((loop () () () ()
+       ((if (expression bool > (var_ref b) (constant float (0.000000)))
+         ((if (expression bool > (var_ref c) (constant float (0.000000))) ()
+           (continue)))
+         ((assign (x) (var_ref return_flag) (constant bool (1)))))
+        break))
+      (if (var_ref return_flag) ()
+       ((assign (x) (var_ref d) (constant float (1.000000))))))
+     ())))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_1.opt_test b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test
new file mode 100755
index 00000000000..a1f895bbf78
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test
@@ -0,0 +1,12 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a void return at the end of a function is
+# eliminated.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 0)' <<EOF
+((declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((assign (x) (var_ref a) (constant float (1.000000))) (return)))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_1.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test.expected
new file mode 100644
index 00000000000..7c3919c016e
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_1.opt_test.expected
@@ -0,0 +1,4 @@
+((declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((assign (x) (var_ref a) (constant float (1.000000)))))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_2.opt_test b/src/glsl/tests/lower_jumps/lower_returns_2.opt_test
new file mode 100755
index 00000000000..61673d4ef66
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_2.opt_test
@@ -0,0 +1,13 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that lowering is not performed on a non-void return at
+# the end of subroutine.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF
+((declare (out) float a)
+ (function sub
+  (signature float (parameters)
+   ((assign (x) (var_ref a) (constant float (1.000000)))
+    (return (constant float (1.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_2.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_2.opt_test.expected
new file mode 100644
index 00000000000..7777927f5a3
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_2.opt_test.expected
@@ -0,0 +1,5 @@
+((declare (out) float a)
+ (function sub
+  (signature float (parameters)
+   ((assign (x) (var_ref a) (constant float (1.000000)))
+    (return (constant float (1.000000)))))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_3.opt_test b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test
new file mode 100755
index 00000000000..9881e249270
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test lowering of returns when there is one nested inside a
+# complex structure of ifs, and one at the end of a function.
+# In this case, the latter return needs to be lowered because it
+# will not be at the end of the function once the final return
+# is inserted.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF
+((declare (in) float a) (declare (in) float b)
+ (function sub
+  (signature float (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return (constant float (1.000000))))
+       ()))
+     ())
+    (return (constant float (2.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected
new file mode 100644
index 00000000000..d4835e96b7c
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_3.opt_test.expected
@@ -0,0 +1,21 @@
+((declare (in) float a) (declare (in) float b)
+ (function sub
+  (signature float (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) float return_value)
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((assign (x) (var_ref return_value) (constant float (1.000000)))
+        (assign (x) (var_ref return_flag) (constant bool (1)))
+        (assign (x) (var_ref execute_flag) (constant bool (0))))
+       ()))
+     ())
+    (if (var_ref execute_flag)
+     ((assign (x) (var_ref return_value) (constant float (2.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      (assign (x) (var_ref execute_flag) (constant bool (0))))
+     ())
+    (return (var_ref return_value))))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_4.opt_test b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test
new file mode 100755
index 00000000000..9f54c67a180
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that returns are properly lowered when they occur in
+# both branches of an if-statement.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF
+((declare (in) float a)
+ (function sub
+  (signature float (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((return (constant float (1.000000))))
+     ((return (constant float (2.000000)))))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected
new file mode 100644
index 00000000000..b551a066f43
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_4.opt_test.expected
@@ -0,0 +1,16 @@
+((declare (in) float a)
+ (function sub
+  (signature float (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) float return_value)
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((assign (x) (var_ref return_value) (constant float (1.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      (assign (x) (var_ref execute_flag) (constant bool (0))))
+     ((assign (x) (var_ref return_value) (constant float (2.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      (assign (x) (var_ref execute_flag) (constant bool (0)))))
+    (return (var_ref return_value))))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test
new file mode 100755
index 00000000000..5f97bfd3f5a
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that do_lower_jumps respects the lower_main_return
+# flag in deciding whether to lower returns in the main
+# function.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF
+((declare (in) float a) (declare (in) float b)
+ (function main
+  (signature void (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected
new file mode 100644
index 00000000000..e8b36f14478
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_main_false.opt_test.expected
@@ -0,0 +1,8 @@
+((declare (in) float a) (declare (in) float b)
+ (function main
+  (signature void (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test
new file mode 100755
index 00000000000..59c7ba1dd52
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that do_lower_jumps respects the lower_main_return
+# flag in deciding whether to lower returns in the main
+# function.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 0)' <<EOF
+((declare (in) float a) (declare (in) float b)
+ (function main
+  (signature void (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected
new file mode 100644
index 00000000000..e15a97d1db2
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_main_true.opt_test.expected
@@ -0,0 +1,13 @@
+((declare (in) float a) (declare (in) float b)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((assign (x) (var_ref return_flag) (constant bool (1)))
+        (assign (x) (var_ref execute_flag) (constant bool (0))))
+       ()))
+     ())))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test
new file mode 100755
index 00000000000..40e784e3318
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that do_lower_jumps respects the lower_sub_return flag
+# in deciding whether to lower returns in subroutines.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF
+((declare (in) float a) (declare (in) float b)
+ (function sub
+  (signature void (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected
new file mode 100644
index 00000000000..07db6e708f4
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_sub_false.opt_test.expected
@@ -0,0 +1,8 @@
+((declare (in) float a) (declare (in) float b)
+ (function sub
+  (signature void (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())))))
diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test
new file mode 100755
index 00000000000..9fe6b90f085
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that do_lower_jumps respects the lower_sub_return flag
+# in deciding whether to lower returns in subroutines.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF
+((declare (in) float a) (declare (in) float b)
+ (function sub
+  (signature void (parameters)
+   ((if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected
new file mode 100644
index 00000000000..31109802351
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_returns_sub_true.opt_test.expected
@@ -0,0 +1,13 @@
+((declare (in) float a) (declare (in) float b)
+ (function sub
+  (signature void (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (if (expression bool > (var_ref a) (constant float (0.000000)))
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((assign (x) (var_ref return_flag) (constant bool (1)))
+        (assign (x) (var_ref execute_flag) (constant bool (0))))
+       ()))
+     ())))))
diff --git a/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test
new file mode 100755
index 00000000000..e7168131487
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test
@@ -0,0 +1,26 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# If both branches of an if statement end in a return, and
+# pull_out_jumps is True, then those returns should be lifted
+# outside the if and then properly lowered.
+# Verify that this lowering occurs during the same pass as the
+# lowering of other returns by checking that extra temporary
+# variables aren't generated.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(1, 0, 1, 0, 0)' <<EOF
+((declare (in) float aa) (declare (in) float ab) (declare (in) float b)
+ (declare (in) float c)
+ (function main
+  (signature void (parameters)
+   ((if (expression bool > (var_ref aa) (constant float (0.000000)))
+     ((if (expression bool > (var_ref ab) (constant float (0.000000)))
+       ((return))
+       ()))
+     ())
+    (if (expression bool > (var_ref b) (constant float (0.000000)))
+     ((if (expression bool > (var_ref c) (constant float (0.000000)))
+       ((return))
+       ((return))))
+     ())))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected
new file mode 100644
index 00000000000..271cd3b462e
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/lower_unified_returns.opt_test.expected
@@ -0,0 +1,21 @@
+((declare (in) float aa) (declare (in) float ab) (declare (in) float b)
+ (declare (in) float c)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (if (expression bool > (var_ref aa) (constant float (0.000000)))
+     ((if (expression bool > (var_ref ab) (constant float (0.000000)))
+       ((assign (x) (var_ref return_flag) (constant bool (1)))
+        (assign (x) (var_ref execute_flag) (constant bool (0))))
+       ()))
+     ())
+    (if (var_ref execute_flag)
+     ((if (expression bool > (var_ref b) (constant float (0.000000)))
+       ((if (expression bool > (var_ref c) (constant float (0.000000))) () ())
+        (assign (x) (var_ref return_flag) (constant bool (1)))
+        (assign (x) (var_ref execute_flag) (constant bool (0))))
+       ()))
+     ())))))
diff --git a/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test
new file mode 100755
index 00000000000..18efc37f6e1
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test
@@ -0,0 +1,13 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a redundant continue-statement at the end of a
+# loop is removed.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF
+((declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) continue))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test.expected b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test.expected
new file mode 100644
index 00000000000..d2a02c6f380
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/remove_continue_at_end_of_loop.opt_test.expected
@@ -0,0 +1,5 @@
+((declare (out) float a)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))))))))
diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test
new file mode 100755
index 00000000000..79c0e824512
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a non-void return at the end of a loop is
+# properly lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF
+((declare (out) float a) (declare (out) float b)
+ (function sub
+  (signature float (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (return (constant float (2.000000)))))
+    (assign (x) (var_ref b) (constant float (3.000000)))
+    (return (constant float (4.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test.expected b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test.expected
new file mode 100644
index 00000000000..2cf117a5ee1
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_nothing.opt_test.expected
@@ -0,0 +1,8 @@
+((declare (out) float a) (declare (out) float b)
+ (function sub
+  (signature float (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (return (constant float (2.000000)))))
+    (assign (x) (var_ref b) (constant float (3.000000)))
+    (return (constant float (4.000000)))))))
diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test
new file mode 100755
index 00000000000..920d2ad9fba
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a non-void return at the end of a loop is
+# properly lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 0)' <<EOF
+((declare (out) float a) (declare (out) float b)
+ (function sub
+  (signature float (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (return (constant float (2.000000)))))
+    (assign (x) (var_ref b) (constant float (3.000000)))
+    (return (constant float (4.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test.expected b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test.expected
new file mode 100644
index 00000000000..0bab8f16f30
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return.opt_test.expected
@@ -0,0 +1,19 @@
+((declare (out) float a) (declare (out) float b)
+ (function sub
+  (signature float (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) float return_value)
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (assign (x) (var_ref return_value) (constant float (2.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      break))
+    (if (var_ref return_flag) ()
+     ((assign (x) (var_ref b) (constant float (3.000000)))
+      (assign (x) (var_ref return_value) (constant float (4.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      (assign (x) (var_ref execute_flag) (constant bool (0)))))
+    (return (var_ref return_value))))))
diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test
new file mode 100755
index 00000000000..99f1f863506
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a non-void return at the end of a loop is
+# properly lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 1, 0, 0, 1)' <<EOF
+((declare (out) float a) (declare (out) float b)
+ (function sub
+  (signature float (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (return (constant float (2.000000)))))
+    (assign (x) (var_ref b) (constant float (3.000000)))
+    (return (constant float (4.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test.expected b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test.expected
new file mode 100644
index 00000000000..0bab8f16f30
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_non_void_at_end_of_loop_lower_return_and_break.opt_test.expected
@@ -0,0 +1,19 @@
+((declare (out) float a) (declare (out) float b)
+ (function sub
+  (signature float (parameters)
+   ((declare (temporary) bool execute_flag)
+    (assign (x) (var_ref execute_flag) (constant bool (1)))
+    (declare (temporary) float return_value)
+    (declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (assign (x) (var_ref return_value) (constant float (2.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      break))
+    (if (var_ref return_flag) ()
+     ((assign (x) (var_ref b) (constant float (3.000000)))
+      (assign (x) (var_ref return_value) (constant float (4.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      (assign (x) (var_ref execute_flag) (constant bool (0)))))
+    (return (var_ref return_value))))))
diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test
new file mode 100755
index 00000000000..63487d32691
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a return of void at the end of a loop is properly
+# lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 0, 0, 0)' <<EOF
+((declare (out) float a) (declare (out) float b)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) (return)))
+    (assign (x) (var_ref b) (constant float (2.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test.expected b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test.expected
new file mode 100644
index 00000000000..0bd8037bf00
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_nothing.opt_test.expected
@@ -0,0 +1,6 @@
+((declare (out) float a) (declare (out) float b)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) (return)))
+    (assign (x) (var_ref b) (constant float (2.000000)))))))
diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test
new file mode 100755
index 00000000000..523c92a686d
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a return of void at the end of a loop is properly
+# lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 0)' <<EOF
+((declare (out) float a) (declare (out) float b)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) (return)))
+    (assign (x) (var_ref b) (constant float (2.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test.expected b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test.expected
new file mode 100644
index 00000000000..53814eaacad
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return.opt_test.expected
@@ -0,0 +1,11 @@
+((declare (out) float a) (declare (out) float b)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      break))
+    (if (var_ref return_flag) ()
+     ((assign (x) (var_ref b) (constant float (2.000000)))))))))
diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test
new file mode 100755
index 00000000000..22b5581cbda
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# This file was generated by create_test_cases.py.
+#
+# Test that a return of void at the end of a loop is properly
+# lowered.
+../../glsl_test optpass --quiet --input-ir 'do_lower_jumps(0, 0, 1, 0, 1)' <<EOF
+((declare (out) float a) (declare (out) float b)
+ (function main
+  (signature void (parameters)
+   ((loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000))) (return)))
+    (assign (x) (var_ref b) (constant float (2.000000)))))))
+EOF
diff --git a/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test.expected b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test.expected
new file mode 100644
index 00000000000..53814eaacad
--- /dev/null
+++ b/src/glsl/tests/lower_jumps/return_void_at_end_of_loop_lower_return_and_break.opt_test.expected
@@ -0,0 +1,11 @@
+((declare (out) float a) (declare (out) float b)
+ (function main
+  (signature void (parameters)
+   ((declare (temporary) bool return_flag)
+    (assign (x) (var_ref return_flag) (constant bool (0)))
+    (loop () () () ()
+     ((assign (x) (var_ref a) (constant float (1.000000)))
+      (assign (x) (var_ref return_flag) (constant bool (1)))
+      break))
+    (if (var_ref return_flag) ()
+     ((assign (x) (var_ref b) (constant float (2.000000)))))))))
diff --git a/src/glsl/tests/optimization-test b/src/glsl/tests/optimization-test
new file mode 100755
index 00000000000..0c130be1379
--- /dev/null
+++ b/src/glsl/tests/optimization-test
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+total=0
+pass=0
+
+echo "====== Testing optimization passes ======"
+for test in `find . -iname '*.opt_test'`; do
+    echo -n "Testing $test..."
+    (cd `dirname "$test"`; ./`basename "$test"`) > "$test.out" 2>&1
+    total=$((total+1))
+    if ./compare_ir "$test.expected" "$test.out" >/dev/null 2>&1; then
+        echo "PASS"
+        pass=$((pass+1))
+    else
+        echo "FAIL"
+        ./compare_ir "$test.expected" "$test.out"
+    fi
+done
+
+echo ""
+echo "$pass/$total tests returned correct results"
+echo ""
+
+if [[ $pass == $total ]]; then
+    exit 0
+else
+    exit 1
+fi
diff --git a/src/glsl/tests/sexps.py b/src/glsl/tests/sexps.py
new file mode 100644
index 00000000000..a714af8d236
--- /dev/null
+++ b/src/glsl/tests/sexps.py
@@ -0,0 +1,103 @@
+# coding=utf-8
+#
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# This file contains helper functions for manipulating sexps in Python.
+#
+# We represent a sexp in Python using nested lists containing strings.
+# So, for example, the sexp (constant float (1.000000)) is represented
+# as ['constant', 'float', ['1.000000']].
+
+import re
+
+def check_sexp(sexp):
+    """Verify that the argument is a proper sexp.
+
+    That is, raise an exception if the argument is not a string or a
+    list, or if it contains anything that is not a string or a list at
+    any nesting level.
+    """
+    if isinstance(sexp, list):
+        for s in sexp:
+            check_sexp(s)
+    elif not isinstance(sexp, basestring):
+        raise Exception('Not a sexp: {0!r}'.format(sexp))
+
+def parse_sexp(sexp):
+    """Convert a string, of the form that would be output by mesa,
+    into a sexp represented as nested lists containing strings.
+    """
+    sexp_token_regexp = re.compile(
+        '[a-zA-Z_]+(@[0-9]+)?|[0-9]+(\\.[0-9]+)?|[^ \n]')
+    stack = [[]]
+    for match in sexp_token_regexp.finditer(sexp):
+        token = match.group(0)
+        if token == '(':
+            stack.append([])
+        elif token == ')':
+            if len(stack) == 1:
+                raise Exception('Unmatched )')
+            sexp = stack.pop()
+            stack[-1].append(sexp)
+        else:
+            stack[-1].append(token)
+    if len(stack) != 1:
+        raise Exception('Unmatched (')
+    if len(stack[0]) != 1:
+        raise Exception('Multiple sexps')
+    return stack[0][0]
+
+def sexp_to_string(sexp):
+    """Convert a sexp, represented as nested lists containing strings,
+    into a single string of the form parseable by mesa.
+    """
+    if isinstance(sexp, basestring):
+        return sexp
+    assert isinstance(sexp, list)
+    result = ''
+    for s in sexp:
+        sub_result = sexp_to_string(s)
+        if result == '':
+            result = sub_result
+        elif '\n' not in result and '\n' not in sub_result and \
+                len(result) + len(sub_result) + 1 <= 70:
+            result += ' ' + sub_result
+        else:
+            result += '\n' + sub_result
+    return '({0})'.format(result.replace('\n', '\n '))
+
+def sort_decls(sexp):
+    """Sort all toplevel variable declarations in sexp.
+
+    This is used to work around the fact that
+    ir_reader::read_instructions reorders declarations.
+    """
+    assert isinstance(sexp, list)
+    decls = []
+    other_code = []
+    for s in sexp:
+        if isinstance(s, list) and len(s) >= 4 and s[0] == 'declare':
+            decls.append(s)
+        else:
+            other_code.append(s)
+    return sorted(decls) + other_code
+

From edaadd94cbae45dc3c1c5481e346f729ea2bc73b Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Fri, 22 Jul 2011 10:38:05 -0700
Subject: [PATCH 047/600] glsl: Add standalone_scaffolding.cpp to SConscript.

---
 src/glsl/SConscript | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/SConscript b/src/glsl/SConscript
index ea104abb823..1da58a91f91 100644
--- a/src/glsl/SConscript
+++ b/src/glsl/SConscript
@@ -96,6 +96,7 @@ glsl_sources = [
     'opt_tree_grafting.cpp',
     'ralloc.c',
     's_expression.cpp',
+    'standalone_scaffolding.cpp',
     'strtod.c',
 ] 
 

From eb7590f6772db844d95ed4937ab7c98a3f412a28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Tue, 19 Jul 2011 15:58:21 -0700
Subject: [PATCH 048/600] util: Store alpha value too.

---
 src/gallium/auxiliary/util/u_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 004df439ff5..2d6193039a7 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -730,7 +730,7 @@ debug_dump_float_rgba_bmp(const char *filename,
          pixel.rgbRed   = float_to_ubyte(ptr[x*4 + 0]);
          pixel.rgbGreen = float_to_ubyte(ptr[x*4 + 1]);
          pixel.rgbBlue  = float_to_ubyte(ptr[x*4 + 2]);
-         pixel.rgbAlpha = 255;
+         pixel.rgbAlpha = float_to_ubyte(ptr[x*4 + 3]);
          os_stream_write(stream, &pixel, 4);
       }
    }

From 0a1d49504de4d34b003625ee7c901667afa43dea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Wed, 20 Jul 2011 14:39:23 -0700
Subject: [PATCH 049/600] llvmpipe: Unit tests for arithmetic functions.

Conflicts:

	src/gallium/drivers/llvmpipe/SConscript
---
 src/gallium/drivers/llvmpipe/Makefile       |   1 +
 src/gallium/drivers/llvmpipe/SConscript     |   5 +-
 src/gallium/drivers/llvmpipe/lp_test_arit.c | 294 ++++++++++++++++++++
 3 files changed, 298 insertions(+), 2 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_test_arit.c

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index ba9705bebee..f9301354fc5 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -51,6 +51,7 @@ C_SOURCES = \
 CPP_SOURCES = \
 
 PROGS := lp_test_format	\
+	 lp_test_arit	\
 	 lp_test_blend	\
 	 lp_test_conv	\
 	 lp_test_printf \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index d6b20ceb5ce..2b232a524ae 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -85,11 +85,12 @@ if not env['embedded']:
     env.Prepend(LIBS = [llvmpipe] + gallium)
 
     tests = [
+        'arit',
         'format',
         'blend',
         'conv',
-	'printf',
-	'sincos',
+        'printf',
+        'sincos',
     ]
 
     if not env['msvc']:
diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c
new file mode 100644
index 00000000000..f0e43e0f9cc
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c
@@ -0,0 +1,294 @@
+/**************************************************************************
+ *
+ * Copyright 2011 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "util/u_pointer.h"
+#include "util/u_memory.h"
+
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_arit.h"
+
+#include "lp_test.h"
+
+
+void
+write_tsv_header(FILE *fp)
+{
+   fprintf(fp,
+           "result\t"
+           "format\n");
+
+   fflush(fp);
+}
+
+
+typedef float (*unary_func_t)(float);
+
+
+/**
+ * Describe a test case of one unary function.
+ */
+struct unary_test_t
+{
+   /*
+    * Test name -- name of the mathematical function under test.
+    */
+
+   const char *name;
+
+   LLVMValueRef
+   (*builder)(struct lp_build_context *bld, LLVMValueRef a);
+
+   /*
+    * Reference (pure-C) function.
+    */
+   float
+   (*ref)(float a);
+
+   /*
+    * Test values.
+    */
+   const float *values;
+   unsigned num_values;
+};
+
+
+const float exp2_values[] = {
+   -60,
+   -4,
+   -2,
+   -1,
+   -1e-007,
+   0,
+   1e-007,
+   1, 
+   2, 
+   4, 
+   60
+};
+
+
+const float log2_values[] = {
+#if 0
+   /* 
+    * Smallest denormalized number; meant just for experimentation, but not
+    * validation.
+    */
+   1.4012984643248171e-45,
+#endif
+   1e-007,
+   0.5,
+   1,
+   2,
+   4,
+   100000,
+   1e+018
+};
+
+
+static float rsqrtf(float x)
+{
+   return 1.0/sqrt(x);
+}
+
+
+const float rsqrt_values[] = {
+   -1, -1e-007,
+   1e-007, 1,
+   -4, -1,
+   1, 4,
+   -1e+035, -100000,
+   100000, 1e+035,
+};
+
+
+const float sincos_values[] = {
+   -5*M_PI/4,
+   -4*M_PI/4,
+   -4*M_PI/4,
+   -3*M_PI/4,
+   -2*M_PI/4,
+   -1*M_PI/4,
+    1*M_PI/4,
+    2*M_PI/4,
+    3*M_PI/4,
+    4*M_PI/4,
+    5*M_PI/4,
+};
+
+
+/*
+ * Unary test cases.
+ */
+
+static const struct unary_test_t unary_tests[] = {
+   {"exp2", &lp_build_exp2, &exp2f, exp2_values, Elements(exp2_values)},
+   {"log2", &lp_build_log2, &log2f, log2_values, Elements(log2_values)},
+   {"exp", &lp_build_exp, &expf, exp2_values, Elements(exp2_values)},
+   {"log", &lp_build_log, &logf, log2_values, Elements(log2_values)},
+   {"rsqrt", &lp_build_rsqrt, &rsqrtf, rsqrt_values, Elements(rsqrt_values)},
+   {"sin", &lp_build_sin, &sinf, sincos_values, Elements(sincos_values)},
+   {"cos", &lp_build_cos, &cosf, sincos_values, Elements(sincos_values)},
+};
+
+
+/*
+ * Build LLVM function that exercises the unary operator builder.
+ */
+static LLVMValueRef
+build_unary_test_func(struct gallivm_state *gallivm,
+                      LLVMModuleRef module,
+                      LLVMContextRef context,
+                      const struct unary_test_t *test)
+{
+   LLVMTypeRef i32t = LLVMInt32TypeInContext(context);
+   LLVMTypeRef f32t = LLVMFloatTypeInContext(context);
+   LLVMTypeRef v4f32t = LLVMVectorType(f32t, 4);
+   LLVMTypeRef args[1] = { f32t };
+   LLVMValueRef func = LLVMAddFunction(module, test->name, LLVMFunctionType(f32t, args, Elements(args), 0));
+   LLVMValueRef arg1 = LLVMGetParam(func, 0);
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry");
+   LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
+   LLVMValueRef ret;
+
+   struct lp_build_context bld;
+
+   lp_build_context_init(&bld, gallivm, lp_float32_vec4_type());
+
+   LLVMSetFunctionCallConv(func, LLVMCCallConv);
+
+   LLVMPositionBuilderAtEnd(builder, block);
+   
+   /* scalar to vector */
+   arg1 = LLVMBuildInsertElement(builder, LLVMGetUndef(v4f32t), arg1, index0, "");
+
+   ret = test->builder(&bld, arg1);
+   
+   /* vector to scalar */
+   ret = LLVMBuildExtractElement(builder, ret, index0, "");
+
+   LLVMBuildRet(builder, ret);
+   return func;
+}
+
+
+/*
+ * Test one LLVM unary arithmetic builder function.
+ */
+static boolean
+test_unary(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, const struct unary_test_t *test)
+{
+   LLVMModuleRef module = gallivm->module;
+   LLVMValueRef test_func;
+   LLVMExecutionEngineRef engine = gallivm->engine;
+   LLVMContextRef context = gallivm->context;
+   char *error = NULL;
+   unary_func_t test_func_jit;
+   boolean success = TRUE;
+   int i;
+
+   test_func = build_unary_test_func(gallivm, module, context, test);
+
+   if (LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+      printf("LLVMVerifyModule: %s\n", error);
+      LLVMDumpModule(module);
+      abort();
+   }
+   LLVMDisposeMessage(error);
+
+   test_func_jit = (unary_func_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_func));
+
+   for (i = 0; i < test->num_values; ++i) {
+      float value = test->values[i];
+      float ref = test->ref(value);
+      float src = test_func_jit(value);
+
+      double error = fabs(src - ref);
+      double precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG;
+
+      bool pass = precision >= 20.0;
+
+      if (isnan(ref)) {
+         continue;
+      }
+
+      if (!pass || verbose) {
+         printf("%s(%.9g): ref = %.9g, src = %.9g, precision = %f bits, %s\n",
+               test->name, value, ref, src, precision,
+               pass ? "PASS" : "FAIL");
+      }
+
+      if (!pass) {
+         success = FALSE;
+      }
+   }
+
+   LLVMFreeMachineCodeForFunction(engine, test_func);
+
+   return success;
+}
+
+
+boolean
+test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+{
+   boolean success = TRUE;
+   int i;
+
+   for (i = 0; i < Elements(unary_tests); ++i) {
+      if (!test_unary(gallivm, verbose, fp, &unary_tests[i])) {
+         success = FALSE;
+      }
+   }
+
+   return success;
+}
+
+
+boolean
+test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
+          unsigned long n)
+{
+   /*
+    * Not randomly generated test cases, so test all.
+    */
+
+   return test_all(gallivm, verbose, fp);
+}
+
+
+boolean
+test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
+{
+   return TRUE;
+}

From 1ac86e249e38b163a3c3cc1915e7de7877c08fb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Tue, 19 Jul 2011 15:58:09 -0700
Subject: [PATCH 050/600] gallivm: Fix lp_build_exp/lp_build_log.

Never used so far -- we only used the base 2 variants -- which is why
it went unnoticed so far.
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 02b3bde7893..06e5debe4a3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -2151,7 +2151,7 @@ lp_build_exp(struct lp_build_context *bld,
 
    assert(lp_check_value(bld->type, x));
 
-   return lp_build_mul(bld, log2e, lp_build_exp2(bld, x));
+   return lp_build_exp2(bld, lp_build_mul(bld, log2e, x));
 }
 
 
@@ -2168,7 +2168,7 @@ lp_build_log(struct lp_build_context *bld,
 
    assert(lp_check_value(bld->type, x));
 
-   return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
+   return lp_build_mul(bld, log2, lp_build_log2(bld, x));
 }
 
 

From ef1a2765a45c03b3bf7b5994197a611bcef96e0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Wed, 20 Jul 2011 14:34:46 -0700
Subject: [PATCH 051/600] gallivm: Update minimax comments.

---
 src/gallium/auxiliary/gallivm/f.cpp | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/f.cpp b/src/gallium/auxiliary/gallivm/f.cpp
index 5eb09c01ab3..6b9c35b3ce5 100644
--- a/src/gallium/auxiliary/gallivm/f.cpp
+++ b/src/gallium/auxiliary/gallivm/f.cpp
@@ -15,8 +15,9 @@
  *
  * How to use this source:
  *
- * - Download and abuild the NTL library from
- *   http://shoup.net/ntl/download.html
+ * - Download and build the NTL library from
+ *   http://shoup.net/ntl/download.html , or install libntl-dev package if on
+ *   Debian.
  *
  * - Download boost source code matching to your distro. 
  *
@@ -24,22 +25,32 @@
  *
  * - Build as
  *
- *   g++ -o minimax -I /path/to/ntl/include main.cpp f.cpp /path/to/ntl/src/ntl.a -lboost_math_tr1
+ *   g++ -o minimax -I /path/to/ntl/include main.cpp f.cpp /path/to/ntl/src/ntl.a
  *
  * - Run as 
  *
  *    ./minimax
  *
- * - For example, to compute exp2 5th order polynomial between [0, 1] do:
+ * - For example, to compute log2 5th order polynomial between [1, 2] do:
+ *
+ *    variant 0
+ *    range 1 2
+ *    order 5 0
+ *    step 200
+ *    info
+ *
+ *  and take the coefficients from the P = { ... } array.
+ *
+ * - To compute exp2 5th order polynomial between [0, 1] do:
  *
  *    variant 1
  *    range 0 1
  *    order 5 0
- *    steps 200
+ *    step 200
  *    info
  *
  * - For more info see
- * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
+ * http://www.boost.org/doc/libs/1_47_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
  */
 
 #define L22

From 47d6d44a231b811f1bba05478a6bbfb1e3fdb27b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Wed, 20 Jul 2011 14:41:17 -0700
Subject: [PATCH 052/600] gallivm: Increase lp_build_rsqrt() precision.

Add an iteration step, which makes rqsqrt precision go from 12bits to
24, and fixes RSQ/NRM test case of PSPrecision/VSPrevision DCTs.

There are no uses of this function outside shader translation.
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 06e5debe4a3..fce4685cc2d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1645,7 +1645,7 @@ lp_build_rsqrt(struct lp_build_context *bld,
    assert(type.floating);
 
    if (util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
-      const unsigned num_iterations = 0;
+      const unsigned num_iterations = 1;
       LLVMValueRef res;
       unsigned i;
 

From af82ff556cdd748f9f6b0d60d53afaaf369b1c5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Wed, 20 Jul 2011 14:53:08 -0700
Subject: [PATCH 053/600] gallivm: Fix lp_build_exp2 order 4-5 polynomial
 coefficients and bump order.

Not sure how I computed these, but they were wrong (which explains why
bumping the polynomial order before never improved precision).

This allows to pass the EXP test cases of PSPrecision/VSPrecision DCTs.
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 24 ++++++++++-----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index fce4685cc2d..0c075bafb2c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -61,7 +61,7 @@
 #include "lp_bld_arit.h"
 
 
-#define EXP_POLY_DEGREE 3
+#define EXP_POLY_DEGREE 5
 
 #define LOG_POLY_DEGREE 5
 
@@ -2218,18 +2218,18 @@ lp_build_polynomial(struct lp_build_context *bld,
  */
 const double lp_build_exp2_polynomial[] = {
 #if EXP_POLY_DEGREE == 5
-   0.999999999690134838155,
-   0.583974334321735217258,
-   0.164553105719676828492,
-   0.0292811063701710962255,
-   0.00354944426657875141846,
-   0.000296253726543423377365
+   0.999999925063526176901,
+   0.693153073200168932794,
+   0.240153617044375388211,
+   0.0558263180532956664775,
+   0.00898934009049466391101,
+   0.00187757667519147912699
 #elif EXP_POLY_DEGREE == 4
-   1.00000001502262084505,
-   0.563586057338685991394,
-   0.150436017652442413623,
-   0.0243220604213317927308,
-   0.0025359088446580436489
+   1.00000259337069434683,
+   0.693003834469974940458,
+   0.24144275689150793076,
+   0.0520114606103070150235,
+   0.0135341679161270268764
 #elif EXP_POLY_DEGREE == 3
    0.999925218562710312959,
    0.695833540494823811697,

From 5161aff48af2fe0171be06fc727a000ad300fbd9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Wed, 20 Jul 2011 14:53:59 -0700
Subject: [PATCH 054/600] gallivm: Add a note about log2 computation and
 denormalized numbers.

---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 0c075bafb2c..2be8598704e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -2465,6 +2465,12 @@ lp_build_log2_approx(struct lp_build_context *bld,
 
       assert(type.floating && type.width == 32);
 
+      /* 
+       * We don't explicitly handle denormalized numbers. They will yield a
+       * result in the neighbourhood of -127, which appears to be adequate
+       * enough.
+       */
+
       i = LLVMBuildBitCast(builder, x, int_vec_type, "");
 
       /* exp = (float) exponent(x) */

From 8d5f3cef795428d7a95120416122a39c10cff11c Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 18 Jul 2011 09:51:34 -0700
Subject: [PATCH 055/600] glsl: Move is_array_or_matrix outside visitor class

There's no reason for it to be there, and another class that may not
have access to the visitor will need it soon.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/lower_variable_index_to_cond_assign.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp
index 8eb1612f0a0..45adb267f2c 100644
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -37,6 +37,12 @@
 #include "glsl_types.h"
 #include "main/macros.h"
 
+static inline bool
+is_array_or_matrix(const ir_instruction *ir)
+{
+   return (ir->type->is_array() || ir->type->is_matrix());
+}
+
 struct assignment_generator
 {
    ir_instruction* base_ir;
@@ -233,11 +239,6 @@ public:
    bool lower_temps;
    bool lower_uniforms;
 
-   bool is_array_or_matrix(const ir_instruction *ir) const
-   {
-      return (ir->type->is_array() || ir->type->is_matrix());
-   }
-
    bool needs_lowering(ir_dereference_array *deref) const
    {
       if (deref == NULL || deref->array_index->as_constant()

From d2296e784aa8fad81c4910dcbbf61e826ce9a06a Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 18 Jul 2011 10:07:24 -0700
Subject: [PATCH 056/600] glsl: Split out part of
 variable_index_to_cond_assign_visitor::needs_lowering

Other code will soon need to know if an array needs lowering based
exclusively on the storage mode.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/lower_variable_index_to_cond_assign.cpp | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp
index 45adb267f2c..c0b69c8f0b1 100644
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -239,12 +239,8 @@ public:
    bool lower_temps;
    bool lower_uniforms;
 
-   bool needs_lowering(ir_dereference_array *deref) const
+   bool storage_type_needs_lowering(ir_dereference_array *deref) const
    {
-      if (deref == NULL || deref->array_index->as_constant()
-	  || !is_array_or_matrix(deref->array))
-	 return false;
-
       if (deref->array->ir_type == ir_type_constant)
 	 return this->lower_temps;
 
@@ -268,6 +264,15 @@ public:
       return false;
    }
 
+   bool needs_lowering(ir_dereference_array *deref) const
+   {
+      if (deref == NULL || deref->array_index->as_constant()
+	  || !is_array_or_matrix(deref->array))
+	 return false;
+
+      return this->storage_type_needs_lowering(deref);
+   }
+
    ir_variable *convert_dereference_array(ir_dereference_array *orig_deref,
 					  ir_assignment* orig_assign)
    {

From 1731ac308631138ca98d34e8b7070c6e3f981939 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 18 Jul 2011 12:18:19 -0700
Subject: [PATCH 057/600] glsl: Rework lowering of non-constant array indexing

The previous implementation could easily get tricked if the LHS of an
assignment included a non-constant index that was "inside" another
dereference.  For example:

    mat4 m[2];
    m[0][i] = vec4(0.0);

Due to the way it tracked whether the array was being assigned, it
would think that the non-constant index was in an r-value.  The new
code fixes that by tracking l-values and r-values differently.  The
index is also replaced by cloning the IR and replacing the index
variable instead of the odd way it was done before.

v2: Apply some simplifications suggested by Eric Anholt.  Making
assignment_generator::rvalue be ir_dereference instead of ir_rvalue
simplified the code a bit.

Fixes i965 piglit fs-temp-array-mat[234]-index-wr and
vs-varying-array-mat[234]-index-wr.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34691
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 .../lower_variable_index_to_cond_assign.cpp   | 135 +++++++++++++++---
 1 file changed, 116 insertions(+), 19 deletions(-)

diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp
index c0b69c8f0b1..107bcc67aed 100644
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -29,6 +29,21 @@
  *
  * Pre-DX10 GPUs often don't have a native way to do this operation,
  * and this works around that.
+ *
+ * The lowering process proceeds as follows.  Each non-constant index
+ * found in an r-value is converted to a canonical form \c array[i].  Each
+ * element of the array is conditionally assigned to a temporary by comparing
+ * \c i to a constant index.  This is done by cloning the canonical form and
+ * replacing all occurances of \c i with a constant.  Each remaining occurance
+ * of the canonical form in the IR is replaced with a dereference of the
+ * temporary variable.
+ *
+ * L-values with non-constant indices are handled similarly.  In this case,
+ * the RHS of the assignment is assigned to a temporary.  The non-constant
+ * index is replace with the canonical form (just like for r-values).  The
+ * temporary is conditionally assigned to each element of the canonical form
+ * by comparing \c i with each index.  The same clone-and-replace scheme is
+ * used.
  */
 
 #include "ir.h"
@@ -43,10 +58,70 @@ is_array_or_matrix(const ir_instruction *ir)
    return (ir->type->is_array() || ir->type->is_matrix());
 }
 
+/**
+ * Replace a dereference of a variable with a specified r-value
+ *
+ * Each time a dereference of the specified value is replaced, the r-value
+ * tree is cloned.
+ */
+class deref_replacer : public ir_rvalue_visitor {
+public:
+   deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value)
+      : variable_to_replace(variable_to_replace), value(value),
+	progress(false)
+   {
+      assert(this->variable_to_replace != NULL);
+      assert(this->value != NULL);
+   }
+
+   virtual void handle_rvalue(ir_rvalue **rvalue)
+   {
+      ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable();
+
+      if ((dv != NULL) && (dv->var == this->variable_to_replace)) {
+	 this->progress = true;
+	 *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL);
+      }
+   }
+
+   const ir_variable *variable_to_replace;
+   ir_rvalue *value;
+   bool progress;
+};
+
+/**
+ * Find a variable index dereference of an array in an rvalue tree
+ */
+class find_variable_index : public ir_hierarchical_visitor {
+public:
+   find_variable_index()
+      : deref(NULL)
+   {
+      /* empty */
+   }
+
+   virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
+   {
+      if (is_array_or_matrix(ir->array)
+	  && (ir->array_index->as_constant() == NULL)) {
+	 this->deref = ir;
+	 return visit_stop;
+      }
+
+      return visit_continue;
+   }
+
+   /**
+    * First array dereference found in the tree that has a non-constant index.
+    */
+   ir_dereference_array *deref;
+};
+
 struct assignment_generator
 {
    ir_instruction* base_ir;
-   ir_rvalue* array;
+   ir_dereference *rvalue;
+   ir_variable *old_index;
    bool is_write;
    unsigned int write_mask;
    ir_variable* var;
@@ -61,18 +136,23 @@ struct assignment_generator
        * underlying variable.
        */
       void *mem_ctx = ralloc_parent(base_ir);
-      ir_dereference *element =
-	 new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, NULL),
-					   new(mem_ctx) ir_constant(i));
-      ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var);
 
-      ir_assignment *assignment;
-      if (is_write) {
-	 assignment = new(mem_ctx) ir_assignment(element, variable, condition,
-						 write_mask);
-      } else {
-	 assignment = new(mem_ctx) ir_assignment(variable, element, condition);
-      }
+      /* Clone the old r-value in its entirety.  Then replace any occurances of
+       * the old variable index with the new constant index.
+       */
+      ir_dereference *element = this->rvalue->clone(mem_ctx, NULL);
+      ir_constant *const index = new(mem_ctx) ir_constant(i);
+      deref_replacer r(this->old_index, index);
+      element->accept(&r);
+      assert(r.progress);
+
+      /* Generate a conditional assignment to (or from) the constant indexed
+       * array dereference.
+       */
+      ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var);
+      ir_assignment *const assignment = (is_write)
+	 ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask)
+	 : new(mem_ctx) ir_assignment(variable, element, condition);
 
       list->push_tail(assignment);
    }
@@ -274,7 +354,8 @@ public:
    }
 
    ir_variable *convert_dereference_array(ir_dereference_array *orig_deref,
-					  ir_assignment* orig_assign)
+					  ir_assignment* orig_assign,
+					  ir_dereference *orig_base)
    {
       assert(is_array_or_matrix(orig_deref->array));
 
@@ -320,9 +401,12 @@ public:
 	 new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL);
       base_ir->insert_before(assign);
 
+      orig_deref->array_index = lhs->clone(mem_ctx, NULL);
+
       assignment_generator ag;
-      ag.array = orig_deref->array;
+      ag.rvalue = orig_base;
       ag.base_ir = base_ir;
+      ag.old_index = index;
       ag.var = var;
       if (orig_assign) {
 	 ag.is_write = true;
@@ -342,12 +426,16 @@ public:
 
    virtual void handle_rvalue(ir_rvalue **pir)
    {
+      if (this->in_assignee)
+	 return;
+
       if (!*pir)
          return;
 
       ir_dereference_array* orig_deref = (*pir)->as_dereference_array();
       if (needs_lowering(orig_deref)) {
-         ir_variable* var = convert_dereference_array(orig_deref, 0);
+         ir_variable *var =
+	    convert_dereference_array(orig_deref, NULL, orig_deref);
          assert(var);
          *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var);
          this->progress = true;
@@ -359,10 +447,11 @@ public:
    {
       ir_rvalue_visitor::visit_leave(ir);
 
-      ir_dereference_array *orig_deref = ir->lhs->as_dereference_array();
+      find_variable_index f;
+      ir->lhs->accept(&f);
 
-      if (needs_lowering(orig_deref)) {
-         convert_dereference_array(orig_deref, ir);
+      if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) {
+         convert_dereference_array(f.deref, ir, ir->lhs);
          ir->remove();
          this->progress = true;
       }
@@ -383,7 +472,15 @@ lower_variable_index_to_cond_assign(exec_list *instructions,
 					   lower_temp,
 					   lower_uniform);
 
-   visit_list_elements(&v, instructions);
+   /* Continue lowering until no progress is made.  If there are multiple
+    * levels of indirection (e.g., non-constant indexing of array elements and
+    * matrix columns of an array of matrix), each pass will only lower one
+    * level of indirection.
+    */
+   do {
+      v.progress = false;
+      visit_list_elements(&v, instructions);
+   } while (v.progress);
 
    return v.progress;
 }

From 5f83dfe5b70337bcffe215f7c32d0b862b5e7a3b Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 17 Jul 2011 17:33:26 -0700
Subject: [PATCH 058/600] glsl: When lowering non-constant array indexing,
 respect existing conditions

If the non-constant index was in the LHS of an assignment, any
existing condititon on that assignment would be lost.

Fixes i965 piglit:

    fs-temp-array-mat[234]-col-row-wr
    fs-temp-array-mat[234]-index-col-row-wr
    fs-temp-array-mat[234]-index-col-wr
    fs-temp-array-mat[234]-index-row-wr
    vs-varying-array-mat[234]-index-col-wr

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 .../lower_variable_index_to_cond_assign.cpp   | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp
index 107bcc67aed..e08ec13456b 100644
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -417,9 +417,24 @@ public:
 
       switch_generator sg(ag, index, 4, 4);
 
-      exec_list list;
-      sg.generate(0, length, &list);
-      base_ir->insert_before(&list);
+      /* If the original assignment has a condition, respect that original
+       * condition!  This is acomplished by wrapping the new conditional
+       * assignments in an if-statement that uses the original condition.
+       */
+      if ((orig_assign != NULL) && (orig_assign->condition != NULL)) {
+	 /* No need to clone the condition because the IR that it hangs on is
+	  * going to be removed from the instruction sequence.
+	  */
+	 ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition);
+
+	 sg.generate(0, length, &if_stmt->then_instructions);
+	 base_ir->insert_before(if_stmt);
+      } else {
+	 exec_list list;
+
+	 sg.generate(0, length, &list);
+	 base_ir->insert_before(&list);
+      }
 
       return var;
    }

From 601428d2bbcf650c746f7a10b47228948f0ea912 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 17 Jul 2011 17:35:00 -0700
Subject: [PATCH 059/600] glsl: When lowering non-constant vector indexing,
 respect existing conditions

If the non-constant index was in the LHS of an assignment, any
existing condititon on that assignment would be lost.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/lower_vec_index_to_cond_assign.cpp | 29 +++++++++++++++++----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/glsl/lower_vec_index_to_cond_assign.cpp b/src/glsl/lower_vec_index_to_cond_assign.cpp
index 3c4d93201d2..15992e27288 100644
--- a/src/glsl/lower_vec_index_to_cond_assign.cpp
+++ b/src/glsl/lower_vec_index_to_cond_assign.cpp
@@ -171,21 +171,23 @@ ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir)
 
    assert(orig_deref->array_index->type->base_type == GLSL_TYPE_INT);
 
+   exec_list list;
+
    /* Store the index to a temporary to avoid reusing its tree. */
    index = new(ir) ir_variable(glsl_type::int_type, "vec_index_tmp_i",
 			       ir_var_temporary);
-   ir->insert_before(index);
+   list.push_tail(index);
    deref = new(ir) ir_dereference_variable(index);
    assign = new(ir) ir_assignment(deref, orig_deref->array_index, NULL);
-   ir->insert_before(assign);
+   list.push_tail(assign);
 
    /* Store the RHS to a temporary to avoid reusing its tree. */
    var = new(ir) ir_variable(ir->rhs->type, "vec_index_tmp_v",
 			     ir_var_temporary);
-   ir->insert_before(var);
+   list.push_tail(var);
    deref = new(ir) ir_dereference_variable(var);
    assign = new(ir) ir_assignment(deref, ir->rhs, NULL);
-   ir->insert_before(assign);
+   list.push_tail(assign);
 
    /* Generate a conditional move of each vector element to the temp. */
    for (i = 0; i < orig_deref->array->type->vector_elements; i++) {
@@ -205,8 +207,25 @@ ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir)
 
       deref = new(ir) ir_dereference_variable(var);
       assign = new(ir) ir_assignment(swizzle, deref, condition);
-      ir->insert_before(assign);
+      list.push_tail(assign);
    }
+
+   /* If the original assignment has a condition, respect that original
+    * condition!  This is acomplished by wrapping the new conditional
+    * assignments in an if-statement that uses the original condition.
+    */
+   if (ir->condition != NULL) {
+      /* No need to clone the condition because the IR that it hangs on is
+       * going to be removed from the instruction sequence.
+       */
+      ir_if *if_stmt = new(mem_ctx) ir_if(ir->condition);
+
+      list.move_nodes_to(&if_stmt->then_instructions);
+      ir->insert_before(if_stmt);
+   } else {
+      ir->insert_before(&list);
+   }
+
    ir->remove();
 
    this->progress = true;

From d6e1a8f71437d4a65e65f93271b2892dd62b0d23 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 17 Jul 2011 23:15:54 -0700
Subject: [PATCH 060/600] ir_to_mesa: Add each relative address to the previous

This fixes many cases of accessing arrays of matrices using
non-constant indices at each level.

Fixes i965 piglit:

    vs-temp-array-mat[234]-index-col-rd
    vs-temp-array-mat[234]-index-col-row-rd
    vs-temp-array-mat[234]-index-col-wr
    vs-uniform-array-mat[234]-index-col-rd

Fixes swrast piglit:

    fs-temp-array-mat[234]-index-col-rd
    fs-temp-array-mat[234]-index-col-row-rd
    fs-temp-array-mat[234]-index-col-wr
    fs-uniform-array-mat[234]-index-col-rd
    fs-uniform-array-mat[234]-index-col-row-rd
    fs-varying-array-mat[234]-index-col-rd
    fs-varying-array-mat[234]-index-col-row-rd
    vs-temp-array-mat[234]-index-col-rd
    vs-temp-array-mat[234]-index-col-row-rd
    vs-temp-array-mat[234]-index-col-wr
    vs-uniform-array-mat[234]-index-col-rd
    vs-uniform-array-mat[234]-index-col-row-rd
    vs-varying-array-mat[234]-index-col-rd
    vs-varying-array-mat[234]-index-col-row-rd
    vs-varying-array-mat[234]-index-col-wr

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index d8e5a3a9772..beb481b3a3b 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1496,6 +1496,18 @@ ir_to_mesa_visitor::visit(ir_dereference_array *ir)
 	      this->result, src_reg_for_float(element_size));
       }
 
+      /* If there was already a relative address register involved, add the
+       * new and the old together to get the new offset.
+       */
+      if (src.reladdr != NULL)  {
+	 src_reg accum_reg = get_temp(glsl_type::float_type);
+
+	 emit(ir, OPCODE_ADD, dst_reg(accum_reg),
+	      index_reg, *src.reladdr);
+
+	 index_reg = accum_reg;
+      }
+
       src.reladdr = ralloc(mem_ctx, src_reg);
       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
    }

From f7cd9a858c043e609fcdbf9ac9dfc1ef7ad002bf Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 17 Jul 2011 23:35:26 -0700
Subject: [PATCH 061/600] ir_to_mesa: Copy reladdr in src_reg(dst_reg)
 constructor

Fixes i965 piglit:

    vs-temp-array-mat[234]-col-row-wr
    vs-temp-array-mat[234]-index-col-row-wr
    vs-temp-array-mat[234]-index-row-wr
    vs-temp-mat[234]-col-row-wr

Fixes swrast piglit:

    fs-temp-array-mat[234]-col-row-wr
    fs-temp-array-mat[234]-index-col-row-wr
    fs-temp-array-mat[234]-index-row-wr
    fs-temp-mat[234]-col-row-wr
    vs-temp-array-mat[234]-col-row-wr
    vs-temp-array-mat[234]-index-col-row-wr
    vs-temp-array-mat[234]-index-row-wr
    vs-temp-mat[234]-col-row-wr

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index beb481b3a3b..8b4a535b75f 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -134,7 +134,7 @@ src_reg::src_reg(dst_reg reg)
    this->index = reg.index;
    this->swizzle = SWIZZLE_XYZW;
    this->negate = 0;
-   this->reladdr = NULL;
+   this->reladdr = reg.reladdr;
 }
 
 dst_reg::dst_reg(src_reg reg)

From fbeb68e880318808f90c779cd3f8b8c4160eecf8 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 20 Jul 2011 18:02:17 -0700
Subject: [PATCH 062/600] prog_optimize: Set unused regs to PROGRAM_UNDEFINED
 after CMP->MOV conversion

Leaving the unused registers with other values caused assertion
failures and other problems in places that blindly iterate over all
sources.

brw_vs_emit.c:1381: get_src_reg: Assertion `c->regs[file][index].nr !=
0' failed.

Fixes i965 piglit:

    vs-uniform-array-mat[234]-col-row-rd
    vs-uniform-array-mat[234]-index-col-row-rd
    vs-uniform-array-mat[234]-index-row-rd
    vs-uniform-mat[234]-col-row-rd

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/prog_optimize.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index 8a40fa69eca..f4a7a638d5f 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -1319,6 +1319,15 @@ _mesa_simplify_cmp(struct gl_program * program)
 
          inst->Opcode = OPCODE_MOV;
          inst->SrcReg[0] = inst->SrcReg[1];
+
+	 /* Unused operands are expected to have the file set to
+	  * PROGRAM_UNDEFINED.  This is how _mesa_init_instructions initializes
+	  * all of the sources.
+	  */
+	 inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+	 inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+	 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
       }
    }
    if (dbg) {

From 337e2dfad0bcd567755272271abd2593a1d0fd1f Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 20 Jul 2011 16:04:17 -0700
Subject: [PATCH 063/600] i965: When emitting a src/dst write of an output,
 keep the write mask

Fixes i965 piglit:

    vs-varying-array-mat[234]-col-row-wr
    vs-varying-array-mat[234]-index-col-row-wr
    vs-varying-array-mat[234]-index-row-wr
    vs-varying-array-mat[234]-row-wr
    vs-varying-mat[234]-col-row-wr
    vs-varying-mat[234]-row-wr

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_vs_emit.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 9d733344a26..5ef8b0720ba 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1993,7 +1993,11 @@ void brw_vs_emit(struct brw_vs_compile *c )
       index = inst->DstReg.Index;
       file = inst->DstReg.File;
       if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
-	  dst = c->output_regs[index].reg;
+	 /* Can't just make get_dst "do the right thing" here because other
+	  * callers of get_dst don't expect any special behavior for the
+	  * c->output_regs[index].used_in_src case.
+	  */
+	 dst = brw_writemask(c->output_regs[index].reg, inst->DstReg.WriteMask);
       else
 	  dst = get_dst(c, inst->DstReg);
 

From 1d3f09f15998c60326bf6c53a8d32c82496264ae Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 20 Jul 2011 18:07:50 -0700
Subject: [PATCH 064/600] i965: When emitting a src/dst read of an output, keep
 the swizzle and neg

Fixes i965 piglit vs-varying-array-mat[234]-row-rd.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_vs_emit.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 5ef8b0720ba..d8cb0f7cb69 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1980,9 +1980,22 @@ void brw_vs_emit(struct brw_vs_compile *c )
 	      const struct prog_src_register *src = &inst->SrcReg[i];
 	      index = src->Index;
 	      file = src->File;	
-	      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
-		  args[i] = c->output_regs[index].reg;
-	      else
+	      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) {
+		 /* Can't just make get_arg "do the right thing" here because
+		  * other callers of get_arg and get_src_reg don't expect any
+		  * special behavior for the c->output_regs[index].used_in_src
+		  * case.
+		  */
+		 args[i] = c->output_regs[index].reg;
+		 args[i].dw1.bits.swizzle =
+		    BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
+				 GET_SWZ(src->Swizzle, 1),
+				 GET_SWZ(src->Swizzle, 2),
+				 GET_SWZ(src->Swizzle, 3));
+
+		 /* Note this is ok for non-swizzle ARB_vp instructions */
+		 args[i].negate = src->Negate ? 1 : 0;
+	      } else
                   args[i] = get_arg(c, inst, i);
 	  }
 

From 156f85336f80d542569f0b0182bd27c7f3218e70 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 18 Jul 2011 16:25:33 -0700
Subject: [PATCH 065/600] glsl: Treat ir_dereference_array of non-var as a
 constant for lowering

Previously the code would just look at deref->array->type to see if it
was a constant.  This isn't good enough because deref->array might be
another ir_dereference_array... of a constant.  As a result,
deref->array->type wouldn't be a constant, but
deref->variable_referenced() would return NULL.  The unchecked NULL
pointer would shortly lead to a segfault.

Instead just look at the return of deref->variable_referenced().  If
it's NULL, assume that either a constant or some other form of
anonymous temporary storage is being dereferenced.

This is a bit hinkey because most drivers treat constant arrays as
uniforms, but the lowering pass treats them as temporaries.  This
keeps the behavior of the old code, so this change isn't making things
worse.

Fixes i965 piglit:

    vs-temp-array-mat[234]-index-col-rd
    vs-temp-array-mat[234]-index-col-row-rd
    vs-uniform-array-mat[234]-index-col-rd
    vs-uniform-array-mat[234]-index-col-row-rd

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/lower_variable_index_to_cond_assign.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp
index e08ec13456b..79fa58ec8d5 100644
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -321,10 +321,16 @@ public:
 
    bool storage_type_needs_lowering(ir_dereference_array *deref) const
    {
-      if (deref->array->ir_type == ir_type_constant)
+      /* If a variable isn't eventually the target of this dereference, then
+       * it must be a constant or some sort of anonymous temporary storage.
+       *
+       * FINISHME: Is this correct?  Most drivers treat arrays of constants as
+       * FINISHME: uniforms.  It seems like this should do the same.
+       */
+      const ir_variable *const var = deref->array->variable_referenced();
+      if (var == NULL)
 	 return this->lower_temps;
 
-      const ir_variable *const var = deref->array->variable_referenced();
       switch (var->mode) {
       case ir_var_auto:
       case ir_var_temporary:

From 90cc372400e1f5869baabd841823bbf9296d7b01 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 18 Jul 2011 18:48:39 -0700
Subject: [PATCH 066/600] glsl: Factor out code that generates block of index
 comparisons

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/ir_optimization.h                    |   4 +
 .../lower_variable_index_to_cond_assign.cpp   | 111 +++++++++++-------
 2 files changed, 73 insertions(+), 42 deletions(-)

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 59a040751d9..f7808bdda9a 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -69,3 +69,7 @@ bool lower_variable_index_to_cond_assign(exec_list *instructions,
     bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
 bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
 bool optimize_redundant_jumps(exec_list *instructions);
+
+ir_rvalue *
+compare_index_block(exec_list *instructions, ir_variable *index,
+		    unsigned base, unsigned components, void *mem_ctx);
diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp
index 79fa58ec8d5..7792e6e624f 100644
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -52,6 +52,70 @@
 #include "glsl_types.h"
 #include "main/macros.h"
 
+/**
+ * Generate a comparison value for a block of indices
+ *
+ * Lowering passes for non-constant indexing of arrays, matrices, or vectors
+ * can use this to generate blocks of index comparison values.
+ *
+ * \param instructions  List where new instructions will be appended
+ * \param index         \c ir_variable containing the desired index
+ * \param base          Base value for this block of comparisons
+ * \param components    Number of unique index values to compare.  This must
+ *                      be on the range [1, 4].
+ * \param mem_ctx       ralloc memory context to be used for all allocations.
+ *
+ * \returns
+ * An \c ir_rvalue that \b must be cloned for each use in conditional
+ * assignments, etc.
+ */
+ir_rvalue *
+compare_index_block(exec_list *instructions, ir_variable *index,
+		    unsigned base, unsigned components, void *mem_ctx)
+{
+   ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index);
+
+   assert(index->type->is_scalar());
+   assert(index->type->base_type == GLSL_TYPE_INT);
+   assert(components >= 1 && components <= 4);
+
+   if (components > 1) {
+      const ir_swizzle_mask m = { 0, 0, 0, 0, components, false };
+      broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m);
+   }
+
+   /* Compare the desired index value with the next block of four indices.
+    */
+   ir_constant_data test_indices_data;
+   memset(&test_indices_data, 0, sizeof(test_indices_data));
+   test_indices_data.i[0] = base;
+   test_indices_data.i[1] = base + 1;
+   test_indices_data.i[2] = base + 2;
+   test_indices_data.i[3] = base + 3;
+
+   ir_constant *const test_indices =
+      new(mem_ctx) ir_constant(broadcast_index->type,
+			       &test_indices_data);
+
+   ir_rvalue *const condition_val =
+      new(mem_ctx) ir_expression(ir_binop_equal,
+				 &glsl_type::bool_type[components - 1],
+				 broadcast_index,
+				 test_indices);
+
+   ir_variable *const condition =
+      new(mem_ctx) ir_variable(condition_val->type,
+			       "dereference_condition",
+			       ir_var_temporary);
+   instructions->push_tail(condition);
+
+   ir_rvalue *const cond_deref =
+      new(mem_ctx) ir_dereference_variable(condition);
+   instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0));
+
+   return cond_deref;
+}
+
 static inline bool
 is_array_or_matrix(const ir_instruction *ir)
 {
@@ -204,54 +268,17 @@ struct switch_generator
       for (unsigned i = first; i < end; i += 4) {
          const unsigned comps = MIN2(condition_components, end - i);
 
-         ir_rvalue *broadcast_index =
-	    new(this->mem_ctx) ir_dereference_variable(index);
-
-         if (comps) {
-	    const ir_swizzle_mask m = { 0, 0, 0, 0, comps, false };
-	    broadcast_index = new(this->mem_ctx) ir_swizzle(broadcast_index, m);
-	 }
-
-	 /* Compare the desired index value with the next block of four indices.
-	  */
-         ir_constant_data test_indices_data;
-         memset(&test_indices_data, 0, sizeof(test_indices_data));
-         test_indices_data.i[0] = i;
-         test_indices_data.i[1] = i + 1;
-         test_indices_data.i[2] = i + 2;
-         test_indices_data.i[3] = i + 3;
-         ir_constant *const test_indices =
-	    new(this->mem_ctx) ir_constant(broadcast_index->type,
-					   &test_indices_data);
-
-         ir_rvalue *const condition_val =
-	    new(this->mem_ctx) ir_expression(ir_binop_equal,
-					     &glsl_type::bool_type[comps - 1],
-					     broadcast_index,
-					     test_indices);
-
-         ir_variable *const condition =
-	    new(this->mem_ctx) ir_variable(condition_val->type,
-					   "dereference_array_condition",
-					   ir_var_temporary);
-         list->push_tail(condition);
-
 	 ir_rvalue *const cond_deref =
-	    new(this->mem_ctx) ir_dereference_variable(condition);
-         list->push_tail(new(this->mem_ctx) ir_assignment(cond_deref,
-							  condition_val, 0));
+	    compare_index_block(list, index, i, comps, this->mem_ctx);
 
          if (comps == 1) {
-	    ir_rvalue *const cond_deref =
-	       new(this->mem_ctx) ir_dereference_variable(condition);
-
-            this->generator.generate(i, cond_deref, list);
+            this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL),
+				     list);
          } else {
             for (unsigned j = 0; j < comps; j++) {
-	       ir_rvalue *const cond_deref =
-		  new(this->mem_ctx) ir_dereference_variable(condition);
 	       ir_rvalue *const cond_swiz =
-		  new(this->mem_ctx) ir_swizzle(cond_deref, j, 0, 0, 0, 1);
+		  new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL),
+						j, 0, 0, 0, 1);
 
                this->generator.generate(i + j, cond_swiz, list);
             }

From 6c8f1f483a999005cae1da5b54cc8ca1904e7ce7 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 18 Jul 2011 18:51:25 -0700
Subject: [PATCH 067/600] glsl: Compare vector indices in blocks

Just like the non-constant array index lowering pass, compare all N
indices at once.  For accesses to a vec4, this saves 3 comparison
instructions on a vector architecture.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/lower_vec_index_to_cond_assign.cpp | 62 +++++++++++++--------
 1 file changed, 39 insertions(+), 23 deletions(-)

diff --git a/src/glsl/lower_vec_index_to_cond_assign.cpp b/src/glsl/lower_vec_index_to_cond_assign.cpp
index 15992e27288..fce9c3424a1 100644
--- a/src/glsl/lower_vec_index_to_cond_assign.cpp
+++ b/src/glsl/lower_vec_index_to_cond_assign.cpp
@@ -71,8 +71,6 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(ir_rvalue
    ir_assignment *assign;
    ir_variable *index, *var;
    ir_dereference *deref;
-   ir_expression *condition;
-   ir_swizzle *swizzle;
    int i;
 
    if (!orig_deref)
@@ -86,39 +84,52 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(ir_rvalue
 
    assert(orig_deref->array_index->type->base_type == GLSL_TYPE_INT);
 
+   exec_list list;
+
    /* Store the index to a temporary to avoid reusing its tree. */
    index = new(base_ir) ir_variable(glsl_type::int_type,
 				    "vec_index_tmp_i",
 				    ir_var_temporary);
-   base_ir->insert_before(index);
+   list.push_tail(index);
    deref = new(base_ir) ir_dereference_variable(index);
    assign = new(base_ir) ir_assignment(deref, orig_deref->array_index, NULL);
-   base_ir->insert_before(assign);
+   list.push_tail(assign);
 
    /* Temporary where we store whichever value we swizzle out. */
    var = new(base_ir) ir_variable(ir->type, "vec_index_tmp_v",
 				  ir_var_temporary);
-   base_ir->insert_before(var);
+   list.push_tail(var);
+
+   /* Generate a single comparison condition "mask" for all of the components
+    * in the vector.
+    */
+   ir_rvalue *const cond_deref =
+      compare_index_block(&list, index, 0,
+			  orig_deref->array->type->vector_elements,
+			  mem_ctx);
 
    /* Generate a conditional move of each vector element to the temp. */
    for (i = 0; i < orig_deref->array->type->vector_elements; i++) {
-      deref = new(base_ir) ir_dereference_variable(index);
-      condition = new(base_ir) ir_expression(ir_binop_equal,
-					     glsl_type::bool_type,
-					     deref,
-					     new(base_ir) ir_constant(i));
+      ir_rvalue *condition_swizzle =
+	 new(base_ir) ir_swizzle(cond_deref->clone(ir, NULL), i, 0, 0, 0, 1);
 
       /* Just clone the rest of the deref chain when trying to get at the
        * underlying variable.
        */
-      swizzle = new(base_ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL),
-					i, 0, 0, 0, 1);
+      ir_rvalue *swizzle =
+	 new(base_ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL),
+				 i, 0, 0, 0, 1);
 
       deref = new(base_ir) ir_dereference_variable(var);
-      assign = new(base_ir) ir_assignment(deref, swizzle, condition);
-      base_ir->insert_before(assign);
+      assign = new(base_ir) ir_assignment(deref, swizzle, condition_swizzle);
+      list.push_tail(assign);
    }
 
+   /* Put all of the new instructions in the IR stream before the old
+    * instruction.
+    */
+   base_ir->insert_before(&list);
+
    this->progress = true;
    return new(base_ir) ir_dereference_variable(var);
 }
@@ -189,24 +200,29 @@ ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir)
    assign = new(ir) ir_assignment(deref, ir->rhs, NULL);
    list.push_tail(assign);
 
+   /* Generate a single comparison condition "mask" for all of the components
+    * in the vector.
+    */
+   ir_rvalue *const cond_deref =
+      compare_index_block(&list, index, 0,
+			  orig_deref->array->type->vector_elements,
+			  mem_ctx);
+
    /* Generate a conditional move of each vector element to the temp. */
    for (i = 0; i < orig_deref->array->type->vector_elements; i++) {
-      ir_rvalue *condition, *swizzle;
+      ir_rvalue *condition_swizzle =
+	 new(ir) ir_swizzle(cond_deref->clone(ir, NULL), i, 0, 0, 0, 1);
 
-      deref = new(ir) ir_dereference_variable(index);
-      condition = new(ir) ir_expression(ir_binop_equal,
-					glsl_type::bool_type,
-					deref,
-					new(ir) ir_constant(i));
 
       /* Just clone the rest of the deref chain when trying to get at the
        * underlying variable.
        */
-      swizzle = new(ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL),
-				   i, 0, 0, 0, 1);
+      ir_rvalue *swizzle =
+	 new(ir) ir_swizzle(orig_deref->array->clone(mem_ctx, NULL),
+			    i, 0, 0, 0, 1);
 
       deref = new(ir) ir_dereference_variable(var);
-      assign = new(ir) ir_assignment(swizzle, deref, condition);
+      assign = new(ir) ir_assignment(swizzle, deref, condition_swizzle);
       list.push_tail(assign);
    }
 

From ac6455e9a25f5472c96d580e3d2389f1ed1c0619 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Thu, 21 Jul 2011 20:10:34 -0400
Subject: [PATCH 068/600] gallium/softpipe: Don't clobber dest color/alpha
 before masking.

The blend_quad function clobbers the actual render target color/alpha
values while applying the destination blend factor, which results in
restoring the wrong value during the masking stage for write-disabled
channels.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/softpipe/sp_quad_blend.c | 185 ++++++++++---------
 1 file changed, 96 insertions(+), 89 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index 04bfd14b7c6..82f9785e32a 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -240,6 +240,7 @@ blend_quad(struct quad_stage *qs,
    static const float one[4] = { 1, 1, 1, 1 };
    struct softpipe_context *softpipe = qs->softpipe;
    float source[4][QUAD_SIZE] = { { 0 } };
+   float blend_dest[4][QUAD_SIZE];
 
    /*
     * Compute src/first term RGB
@@ -480,79 +481,85 @@ blend_quad(struct quad_stage *qs,
       assert(0 && "invalid alpha src factor");
    }
 
+   /* Save the original dest for use in masking */
+   VEC4_COPY(blend_dest[0], dest[0]);
+   VEC4_COPY(blend_dest[1], dest[1]);
+   VEC4_COPY(blend_dest[2], dest[2]);
+   VEC4_COPY(blend_dest[3], dest[3]);
+
 
    /*
-    * Compute dest/second term RGB
+    * Compute blend_dest/second term RGB
     */
    switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
    case PIPE_BLENDFACTOR_ONE:
-      /* dest = dest * 1   NO-OP, leave dest as-is */
+      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
       break;
    case PIPE_BLENDFACTOR_SRC_COLOR:
-      VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */
-      VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */
-      VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */
+      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
+      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
+      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
       break;
    case PIPE_BLENDFACTOR_SRC_ALPHA:
-      VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */
-      VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */
-      VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */
+      VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
+      VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
+      VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
       break;
    case PIPE_BLENDFACTOR_DST_ALPHA:
       if (has_dst_alpha) {
-         VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */
-         VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */
-         VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */
+         VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
+         VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
+         VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
       }
       else {
-         /* dest = dest * 1   NO-OP, leave dest as-is */
+         /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
       }
       break;
    case PIPE_BLENDFACTOR_DST_COLOR:
-      VEC4_MUL(dest[0], dest[0], dest[0]); /* R */
-      VEC4_MUL(dest[1], dest[1], dest[1]); /* G */
-      VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
+      VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
+      VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
+      VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
       break;
    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
       if (has_dst_alpha) {
          const float *alpha = quadColor[3];
          float diff[4], temp[4];
-         VEC4_SUB(diff, one, dest[3]);
+         VEC4_SUB(diff, one, blend_dest[3]);
          VEC4_MIN(temp, alpha, diff);
-         VEC4_MUL(dest[0], quadColor[0], temp); /* R */
-         VEC4_MUL(dest[1], quadColor[1], temp); /* G */
-         VEC4_MUL(dest[2], quadColor[2], temp); /* B */
+         VEC4_MUL(blend_dest[0], quadColor[0], temp); /* R */
+         VEC4_MUL(blend_dest[1], quadColor[1], temp); /* G */
+         VEC4_MUL(blend_dest[2], quadColor[2], temp); /* B */
       }
       else {
-         VEC4_COPY(dest[0], zero); /* R */
-         VEC4_COPY(dest[1], zero); /* G */
-         VEC4_COPY(dest[2], zero); /* B */
+         VEC4_COPY(blend_dest[0], zero); /* R */
+         VEC4_COPY(blend_dest[1], zero); /* G */
+         VEC4_COPY(blend_dest[2], zero); /* B */
       }
       break;
    case PIPE_BLENDFACTOR_CONST_COLOR:
    {
       float comp[4];
       VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */
-      VEC4_MUL(dest[0], dest[0], comp); /* R */
+      VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
       VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */
-      VEC4_MUL(dest[1], dest[1], comp); /* G */
+      VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
       VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */
-      VEC4_MUL(dest[2], dest[2], comp); /* B */
+      VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
    }
    break;
    case PIPE_BLENDFACTOR_CONST_ALPHA:
    {
       float comp[4];
       VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
-      VEC4_MUL(dest[0], dest[0], comp); /* R */
-      VEC4_MUL(dest[1], dest[1], comp); /* G */
-      VEC4_MUL(dest[2], dest[2], comp); /* B */
+      VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
+      VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
+      VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
    }
    break;
    case PIPE_BLENDFACTOR_ZERO:
-      VEC4_COPY(dest[0], zero); /* R */
-      VEC4_COPY(dest[1], zero); /* G */
-      VEC4_COPY(dest[2], zero); /* B */
+      VEC4_COPY(blend_dest[0], zero); /* R */
+      VEC4_COPY(blend_dest[1], zero); /* G */
+      VEC4_COPY(blend_dest[2], zero); /* B */
       break;
    case PIPE_BLENDFACTOR_SRC1_COLOR:
    case PIPE_BLENDFACTOR_SRC1_ALPHA:
@@ -563,45 +570,45 @@ blend_quad(struct quad_stage *qs,
    {
       float inv_comp[4];
       VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
-      VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
+      VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
       VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
-      VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
+      VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
       VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
-      VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
+      VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
    }
    break;
    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
    {
       float one_minus_alpha[QUAD_SIZE];
       VEC4_SUB(one_minus_alpha, one, quadColor[3]);
-      VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
-      VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
-      VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
+      VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
+      VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
+      VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
    }
    break;
    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
       if (has_dst_alpha) {
          float inv_comp[4];
-         VEC4_SUB(inv_comp, one, dest[3]); /* A */
-         VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */
-         VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */
-         VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */
+         VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
+         VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
+         VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
+         VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
       }
       else {
-         VEC4_COPY(dest[0], zero); /* R */
-         VEC4_COPY(dest[1], zero); /* G */
-         VEC4_COPY(dest[2], zero); /* B */
+         VEC4_COPY(blend_dest[0], zero); /* R */
+         VEC4_COPY(blend_dest[1], zero); /* G */
+         VEC4_COPY(blend_dest[2], zero); /* B */
       }
    break;
    case PIPE_BLENDFACTOR_INV_DST_COLOR:
    {
       float inv_comp[4];
-      VEC4_SUB(inv_comp, one, dest[0]); /* R */
-      VEC4_MUL(dest[0], dest[0], inv_comp); /* R */
-      VEC4_SUB(inv_comp, one, dest[1]); /* G */
-      VEC4_MUL(dest[1], dest[1], inv_comp); /* G */
-      VEC4_SUB(inv_comp, one, dest[2]); /* B */
-      VEC4_MUL(dest[2], dest[2], inv_comp); /* B */
+      VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
+      VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
+      VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
+      VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
+      VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
+      VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
    }
    break;
    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
@@ -609,22 +616,22 @@ blend_quad(struct quad_stage *qs,
       float inv_comp[4];
       /* R */
       VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]);
-      VEC4_MUL(dest[0], dest[0], inv_comp);
+      VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
       /* G */
       VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]);
-      VEC4_MUL(dest[1], dest[1], inv_comp);
+      VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
       /* B */
       VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]);
-      VEC4_MUL(dest[2], dest[2], inv_comp);
+      VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
    }
    break;
    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
    {
       float inv_comp[4];
       VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
-      VEC4_MUL(dest[0], dest[0], inv_comp);
-      VEC4_MUL(dest[1], dest[1], inv_comp);
-      VEC4_MUL(dest[2], dest[2], inv_comp);
+      VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
+      VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
+      VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
    }
    break;
    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
@@ -637,29 +644,29 @@ blend_quad(struct quad_stage *qs,
    }
 
    /*
-    * Compute dest/second term A
+    * Compute blend_dest/second term A
     */
    switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
    case PIPE_BLENDFACTOR_ONE:
-      /* dest = dest * 1   NO-OP, leave dest as-is */
+      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
       break;
    case PIPE_BLENDFACTOR_SRC_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_SRC_ALPHA:
-      VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */
+      VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
       break;
    case PIPE_BLENDFACTOR_DST_COLOR:
       /* fall-through */
    case PIPE_BLENDFACTOR_DST_ALPHA:
       if (has_dst_alpha) {
-         VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
+         VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
       }
       else {
-         /* dest = dest * 1   NO-OP, leave dest as-is */
+         /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
       }
       break;
    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      /* dest = dest * 1   NO-OP, leave dest as-is */
+      /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
       break;
    case PIPE_BLENDFACTOR_CONST_COLOR:
       /* fall-through */
@@ -667,11 +674,11 @@ blend_quad(struct quad_stage *qs,
    {
       float comp[4];
       VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */
-      VEC4_MUL(dest[3], dest[3], comp); /* A */
+      VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
    }
    break;
    case PIPE_BLENDFACTOR_ZERO:
-      VEC4_COPY(dest[3], zero); /* A */
+      VEC4_COPY(blend_dest[3], zero); /* A */
       break;
    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
       /* fall-through */
@@ -679,7 +686,7 @@ blend_quad(struct quad_stage *qs,
    {
       float one_minus_alpha[QUAD_SIZE];
       VEC4_SUB(one_minus_alpha, one, quadColor[3]);
-      VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
+      VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
    }
    break;
    case PIPE_BLENDFACTOR_INV_DST_COLOR:
@@ -687,11 +694,11 @@ blend_quad(struct quad_stage *qs,
    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
       if (has_dst_alpha) {
          float inv_comp[4];
-         VEC4_SUB(inv_comp, one, dest[3]); /* A */
-         VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */
+         VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
+         VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
       }
       else {
-         VEC4_COPY(dest[3], zero); /* A */
+         VEC4_COPY(blend_dest[3], zero); /* A */
       }
       break;
    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
@@ -700,7 +707,7 @@ blend_quad(struct quad_stage *qs,
    {
       float inv_comp[4];
       VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]);
-      VEC4_MUL(dest[3], dest[3], inv_comp);
+      VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
    }
    break;
    default:
@@ -712,29 +719,29 @@ blend_quad(struct quad_stage *qs,
     */
    switch (softpipe->blend->rt[blend_index].rgb_func) {
    case PIPE_BLEND_ADD:
-      VEC4_ADD_SAT(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_ADD_SAT(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_ADD_SAT(quadColor[2], source[2], dest[2]); /* B */
+      VEC4_ADD_SAT(quadColor[0], source[0], blend_dest[0]); /* R */
+      VEC4_ADD_SAT(quadColor[1], source[1], blend_dest[1]); /* G */
+      VEC4_ADD_SAT(quadColor[2], source[2], blend_dest[2]); /* B */
       break;
    case PIPE_BLEND_SUBTRACT:
-      VEC4_SUB_SAT(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_SUB_SAT(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_SUB_SAT(quadColor[2], source[2], dest[2]); /* B */
+      VEC4_SUB_SAT(quadColor[0], source[0], blend_dest[0]); /* R */
+      VEC4_SUB_SAT(quadColor[1], source[1], blend_dest[1]); /* G */
+      VEC4_SUB_SAT(quadColor[2], source[2], blend_dest[2]); /* B */
       break;
    case PIPE_BLEND_REVERSE_SUBTRACT:
-      VEC4_SUB_SAT(quadColor[0], dest[0], source[0]); /* R */
-      VEC4_SUB_SAT(quadColor[1], dest[1], source[1]); /* G */
-      VEC4_SUB_SAT(quadColor[2], dest[2], source[2]); /* B */
+      VEC4_SUB_SAT(quadColor[0], blend_dest[0], source[0]); /* R */
+      VEC4_SUB_SAT(quadColor[1], blend_dest[1], source[1]); /* G */
+      VEC4_SUB_SAT(quadColor[2], blend_dest[2], source[2]); /* B */
       break;
    case PIPE_BLEND_MIN:
-      VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */
+      VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
+      VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
+      VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
       break;
    case PIPE_BLEND_MAX:
-      VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */
-      VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */
-      VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */
+      VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
+      VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
+      VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
       break;
    default:
       assert(0 && "invalid rgb blend func");
@@ -745,19 +752,19 @@ blend_quad(struct quad_stage *qs,
     */
    switch (softpipe->blend->rt[blend_index].alpha_func) {
    case PIPE_BLEND_ADD:
-      VEC4_ADD_SAT(quadColor[3], source[3], dest[3]); /* A */
+      VEC4_ADD_SAT(quadColor[3], source[3], blend_dest[3]); /* A */
       break;
    case PIPE_BLEND_SUBTRACT:
-      VEC4_SUB_SAT(quadColor[3], source[3], dest[3]); /* A */
+      VEC4_SUB_SAT(quadColor[3], source[3], blend_dest[3]); /* A */
       break;
    case PIPE_BLEND_REVERSE_SUBTRACT:
-      VEC4_SUB_SAT(quadColor[3], dest[3], source[3]); /* A */
+      VEC4_SUB_SAT(quadColor[3], blend_dest[3], source[3]); /* A */
       break;
    case PIPE_BLEND_MIN:
-      VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */
+      VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
       break;
    case PIPE_BLEND_MAX:
-      VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */
+      VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
       break;
    default:
       assert(0 && "invalid alpha blend func");

From 4d23c6df81639057f12a604556121aa7b41d921c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <deathsimple@vodafone.de>
Date: Sun, 24 Jul 2011 19:11:34 +0200
Subject: [PATCH 069/600] r600g: use file_max instead of file_count to
 determine reg offset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise shaders with skipped inputs/outputs doesn't work correctly.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 src/gallium/drivers/r600/r600_shader.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 3e21ad1fdc6..494f9370597 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -658,9 +658,9 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 		ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
 	}
 	ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] +
-						ctx.info.file_count[TGSI_FILE_INPUT];
+						ctx.info.file_max[TGSI_FILE_INPUT] + 1;
 	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
-						ctx.info.file_count[TGSI_FILE_OUTPUT];
+						ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
 
 	/* Outside the GPR range. This will be translated to one of the
 	 * kcache banks later. */
@@ -668,7 +668,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 
 	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
 	ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
-			ctx.info.file_count[TGSI_FILE_TEMPORARY];
+			ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
 	ctx.temp_reg = ctx.ar_reg + 1;
 
 	ctx.nliterals = 0;

From 4c84acc86fce5eda0aabcb8aa362fd6b5e6a28f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <deathsimple@vodafone.de>
Date: Mon, 25 Jul 2011 01:32:39 +0200
Subject: [PATCH 070/600] g3dvl: remove unused vs output from
 create_ref_vert_shader

The position of the quad vertex is calculated in calc_position,
so we don't need the output here any more.
---
 src/gallium/auxiliary/vl/vl_mc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
index e5ae0f72c4c..0b3723c9792 100644
--- a/src/gallium/auxiliary/vl/vl_mc.c
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -105,7 +105,7 @@ create_ref_vert_shader(struct vl_mc *r)
    struct ureg_src mv_scale;
    struct ureg_src vmv[2];
    struct ureg_dst t_vpos;
-   struct ureg_dst o_vpos, o_vmv[2];
+   struct ureg_dst o_vmv[2];
    unsigned i;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -120,9 +120,6 @@ create_ref_vert_shader(struct vl_mc *r)
       (float)MACROBLOCK_HEIGHT / r->buffer_height)
    );
 
-   /* XXX The position is not written, which may lead to undefined rendering.
-    * XXX This is a serious bug. */
-   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
    o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
    o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
 

From 42cdf4074e0f7d561b03a86255fa8f916f906bf6 Mon Sep 17 00:00:00 2001
From: Benjamin Franzke <benjaminfranzke@googlemail.com>
Date: Mon, 25 Jul 2011 09:37:02 +0200
Subject: [PATCH 071/600] configure: Move gbm before egl in SRC_DIRS

egl_dri2 built into libEGL depends on libgbm.

Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39515
---
 configure.ac | 70 ++++++++++++++++++++++++++--------------------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/configure.ac b/configure.ac
index 86ba87b39e8..0ea264ef042 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1325,6 +1325,41 @@ AC_SUBST([OSMESA_MESA_DEPS])
 AC_SUBST([OSMESA_PC_REQ])
 AC_SUBST([OSMESA_PC_LIB_PRIV])
 
+dnl
+dnl gbm configuration
+dnl
+if test "x$enable_gbm" = xauto; then
+    case "$with_egl_platforms" in
+        *drm*)
+            enable_gbm=yes ;;
+         *)
+            enable_gbm=no ;;
+    esac
+fi
+if test "x$enable_gbm" = xyes; then
+    SRC_DIRS="$SRC_DIRS gbm"
+    GBM_BACKEND_DIRS=""
+
+    PKG_CHECK_MODULES([LIBUDEV], [libudev], [],
+                      AC_MSG_ERROR([gbm needs udev]))
+    GBM_LIB_DEPS="$DLOPEN_LIBS $LIBUDEV_LIBS"
+
+    if test "x$enable_dri" = xyes; then
+        GBM_BACKEND_DIRS="$GBM_BACKEND_DIRS dri"
+        if test "$SHARED_GLAPI" -eq 0; then
+            AC_MSG_ERROR([gbm_dri requires --enable-shared-glapi])
+        fi
+    fi
+fi
+AC_SUBST([GBM_LIB_DEPS])
+AC_SUBST([GBM_BACKEND_DIRS])
+GBM_PC_REQ_PRIV="libudev"
+GBM_PC_LIB_PRIV="$DLOPEN_LIBS"
+GBM_PC_CFLAGS=
+AC_SUBST([GBM_PC_REQ_PRIV])
+AC_SUBST([GBM_PC_LIB_PRIV])
+AC_SUBST([GBM_PC_CFLAGS])
+
 dnl
 dnl EGL configuration
 dnl
@@ -1366,41 +1401,6 @@ fi
 AC_SUBST([EGL_LIB_DEPS])
 AC_SUBST([EGL_DRIVERS_DIRS])
 
-dnl
-dnl gbm configuration
-dnl
-if test "x$enable_gbm" = xauto; then
-    case "$with_egl_platforms" in
-        *drm*)
-            enable_gbm=yes ;;
-         *)
-            enable_gbm=no ;;
-    esac
-fi
-if test "x$enable_gbm" = xyes; then
-    SRC_DIRS="$SRC_DIRS gbm"
-    GBM_BACKEND_DIRS=""
-
-    PKG_CHECK_MODULES([LIBUDEV], [libudev], [],
-                      AC_MSG_ERROR([gbm needs udev]))
-    GBM_LIB_DEPS="$DLOPEN_LIBS $LIBUDEV_LIBS"
-
-    if test "x$enable_dri" = xyes; then
-        GBM_BACKEND_DIRS="$GBM_BACKEND_DIRS dri"
-        if test "$SHARED_GLAPI" -eq 0; then
-            AC_MSG_ERROR([gbm_dri requires --enable-shared-glapi])
-        fi
-    fi
-fi
-AC_SUBST([GBM_LIB_DEPS])
-AC_SUBST([GBM_BACKEND_DIRS])
-GBM_PC_REQ_PRIV="libudev"
-GBM_PC_LIB_PRIV="$DLOPEN_LIBS"
-GBM_PC_CFLAGS=
-AC_SUBST([GBM_PC_REQ_PRIV])
-AC_SUBST([GBM_PC_LIB_PRIV])
-AC_SUBST([GBM_PC_CFLAGS])
-
 dnl
 dnl EGL Gallium configuration
 dnl

From 4f90b89961ea2795f274611266b649803a058026 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <deathsimple@vodafone.de>
Date: Mon, 25 Jul 2011 02:58:31 +0200
Subject: [PATCH 072/600] gallium: change formats merged with pipe-video to
 type "other"

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=39276
---
 src/gallium/auxiliary/util/u_format.csv | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index 347e2beb8dd..a3d2aae62c8 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -260,10 +260,10 @@ PIPE_FORMAT_R10G10B10X2_USCALED   , plain, 1, 1, u10 , u10 , u10  , x2 , xyz1, r
 # A.k.a. D3DDECLTYPE_DEC3N
 PIPE_FORMAT_R10G10B10X2_SNORM     , plain, 1, 1, sn10, sn10, sn10 , x2 , xyz1, rgb
 
-PIPE_FORMAT_YV12                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_YV16                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_IYUV                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_NV12                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_NV21                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_IA44                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_AI44                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_YV12                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_YV16                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_IYUV                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_NV12                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_NV21                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_IA44                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_AI44                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv

From 7746b7d4bf48b75dd273510e7a6ad6405c91b8bb Mon Sep 17 00:00:00 2001
From: Emeric <emeric.grange@gmail.com>
Date: Mon, 18 Jul 2011 15:17:25 +0000
Subject: [PATCH 073/600] vdpau: enable mpeg1 hw decoding, using the exact same
 code path as mpeg2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=39307

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 src/gallium/state_trackers/vdpau/decode.c | 25 ++++++++++++-----------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 269c7a4baf8..96542f874d9 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -161,12 +161,12 @@ vlVdpDecoderGetParameters(VdpDecoder decoder,
 }
 
 static VdpStatus
-vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder,
-                        struct pipe_video_decode_buffer *buffer,
-                        struct pipe_video_buffer *target,
-                        VdpPictureInfoMPEG1Or2 *picture_info,
-                        uint32_t bitstream_buffer_count,
-                        VdpBitstreamBuffer const *bitstream_buffers)
+vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder,
+                         struct pipe_video_decode_buffer *buffer,
+                         struct pipe_video_buffer *target,
+                         VdpPictureInfoMPEG1Or2 *picture_info,
+                         uint32_t bitstream_buffer_count,
+                         VdpBitstreamBuffer const *bitstream_buffers)
 {
    struct pipe_mpeg12_picture_desc picture;
    struct pipe_video_buffer *ref_frames[2];
@@ -254,17 +254,18 @@ vlVdpDecoderRender(VdpDecoder decoder,
       // TODO: Recreate decoder with correct chroma
       return VDP_STATUS_INVALID_CHROMA_TYPE;
 
-   // TODO: Right now only mpeg2 is supported.
+   // TODO: Right now only mpeg 1 & 2 is supported.
    switch (vldecoder->decoder->profile)   {
+   case PIPE_VIDEO_PROFILE_MPEG1:
    case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
    case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
       ++vldecoder->cur_buffer;
       vldecoder->cur_buffer %= VL_NUM_DECODE_BUFFERS;
-      return vlVdpDecoderRenderMpeg2(vldecoder->decoder,
-                                     vldecoder->buffer[vldecoder->cur_buffer],
-                                     vlsurf->video_buffer,
-                                     (VdpPictureInfoMPEG1Or2 *)picture_info,
-                                     bitstream_buffer_count,bitstream_buffers);
+      return vlVdpDecoderRenderMpeg12(vldecoder->decoder,
+                                      vldecoder->buffer[vldecoder->cur_buffer],
+                                      vlsurf->video_buffer,
+                                      (VdpPictureInfoMPEG1Or2 *)picture_info,
+                                      bitstream_buffer_count,bitstream_buffers);
       break;
 
    default:

From 156cef0fbacf242e8fc67e39ab964e5f8f3739cb Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 21 Jul 2011 21:17:10 -0700
Subject: [PATCH 074/600] i965/fs: Clear result before visiting shadow
 comparitor and LOD info.

Commit 53c89c67f33639afef951e178f93f4e29acc5d53 ("i965: Avoid generating
MOVs for assignments of expressions.") added the line "this->result =
reg_undef" all over the code.  Unfortunately, since Eric developed his
patch before I landed Ivybridge support, he missed adding it to
fs_visitor::emit_texture_gen7() after rebasing.

Furthermore, since I developed TXD support before Eric's patch, I
neglected to add it to the gradient handling when I rebased.

Neglecting to set this causes the visitor to use this->result as storage
rather than generating a new temporary.  These missing statements
resulted in the same register being used to store several different
values.

Fixes the following piglit tests on Ivybridge:
- glsl-fs-shadow2dproj.shader_test
- glsl-fs-shadow2dproj-bias.shader_test

NOTE: This is a candidate for the 7.11 branch.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index cbe5cf428c5..9632aae64b0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -603,9 +603,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
       mlen += 3;
    } else if (ir->op == ir_txd) {
+      this->result = reg_undef;
       ir->lod_info.grad.dPdx->accept(this);
       fs_reg dPdx = this->result;
 
+      this->result = reg_undef;
       ir->lod_info.grad.dPdy->accept(this);
       fs_reg dPdy = this->result;
 
@@ -786,9 +788,11 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       inst = emit(FS_OPCODE_TXL, dst);
       break;
    case ir_txd: {
+      this->result = reg_undef;
       ir->lod_info.grad.dPdx->accept(this);
       fs_reg dPdx = this->result;
 
+      this->result = reg_undef;
       ir->lod_info.grad.dPdy->accept(this);
       fs_reg dPdy = this->result;
 
@@ -850,6 +854,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    }
 
    if (ir->shadow_comparitor && ir->op != ir_txd) {
+      this->result = reg_undef;
       ir->shadow_comparitor->accept(this);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen += reg_width;
@@ -860,11 +865,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    case ir_tex:
       break;
    case ir_txb:
+      this->result = reg_undef;
       ir->lod_info.bias->accept(this);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen += reg_width;
       break;
    case ir_txl:
+      this->result = reg_undef;
       ir->lod_info.lod->accept(this);
       emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
       mlen += reg_width;
@@ -873,9 +880,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       if (c->dispatch_width == 16)
 	 fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
 
+      this->result = reg_undef;
       ir->lod_info.grad.dPdx->accept(this);
       fs_reg dPdx = this->result;
 
+      this->result = reg_undef;
       ir->lod_info.grad.dPdy->accept(this);
       fs_reg dPdy = this->result;
 
@@ -1070,6 +1079,7 @@ fs_visitor::visit(ir_texture *ir)
       if (hw_compare_supported) {
 	 inst->shadow_compare = true;
       } else {
+	 this->result = reg_undef;
 	 ir->shadow_comparitor->accept(this);
 	 fs_reg ref = this->result;
 

From 572f6318954f31fcf3d396ac5df8e9eff3f37c74 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 12 May 2011 04:02:32 -0700
Subject: [PATCH 075/600] i965/gen7: Fix shadow sampling in the old brw_wm_emit
 backend.

On Ivybridge, the shadow comparitor goes in the first slot, rather than
at the end.  It's not necessary to send u, v, and r.

Fixes tests texturing/texdepth and glean/fbo.

NOTE: This is a candidate for the 7.11 branch.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_wm_emit.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index f61757a8cac..6ea4a7d6e50 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1094,9 +1094,16 @@ void emit_tex(struct brw_wm_compile *c,
    if (intel->gen < 5 && c->dispatch_width == 8)
       nr_texcoords = 3;
 
-   /* For shadow comparisons, we have to supply u,v,r. */
-   if (shadow)
-      nr_texcoords = 3;
+   if (shadow) {
+      if (intel->gen < 7) {
+	 /* For shadow comparisons, we have to supply u,v,r. */
+	 nr_texcoords = 3;
+      } else {
+	 /* On Ivybridge, the shadow comparitor comes first. Just load it. */
+	 brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
+	 cur_mrf += mrf_per_channel;
+      }
+   }
 
    /* Emit the texcoords. */
    for (i = 0; i < nr_texcoords; i++) {
@@ -1113,7 +1120,7 @@ void emit_tex(struct brw_wm_compile *c,
    }
 
    /* Fill in the shadow comparison reference value. */
-   if (shadow) {
+   if (shadow && intel->gen < 7) {
       if (intel->gen >= 5) {
 	 /* Fill in the cube map array index value. */
 	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));

From d92463d5dc42aca09a54588c322fc60582cf9131 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Fri, 22 Jul 2011 14:05:52 -0700
Subject: [PATCH 076/600] i965: vs optimization fix: Check val.{negate,abs} in
 accumulator_contains()

When emitting a MAC instruction in a vertex shader, brw_vs_emit()
calls accumulator_contains() to determine whether the accumulator
already contains the appropriate addend; if it does, then we can avoid
emitting an unnecessary MOV instruction.

However, accumulator_contains() wasn't checking the val.negate or
val.abs flags.  As a result, if the desired value was the negation, or
the absolute value, of what was already in the accumulator, we would
generate an incorrect shader.

Fixes piglit test vs-refract-vec4-vec4-float.

Tested on Gen5 and Gen6.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_vs_emit.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index d8cb0f7cb69..674a994bace 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1821,6 +1821,9 @@ accumulator_contains(struct brw_vs_compile *c, struct brw_reg val)
    if (val.address_mode != BRW_ADDRESS_DIRECT)
       return GL_FALSE;
 
+   if (val.negate || val.abs)
+      return GL_FALSE;
+
    switch (prev_insn->header.opcode) {
    case BRW_OPCODE_MOV:
    case BRW_OPCODE_MAC:

From 185868c9c2e6a31a7313df2dbe29490547b65f61 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 25 Jul 2011 11:50:27 -0700
Subject: [PATCH 077/600] i965: Emit texture cache flushes on gen6 along with
 render cache flushes.

It turns out that internally the texture cache gets flushed in a
couple of cases, particularly around 2D operations mixed with 3D.  In
almost all cases one of those happens between rendering to an
FBO-attached texture and rendering from that texture.  However, as of
the next patch, glean tfbo (and the new fbo-flushing-2 test) would
manage to get stale texture values because one of those flushes didn't
occur.  The intention of this code was always to get the render cache
cleared and ready to be used from the sampler cache (and it does on <=
gen4), so this just catches gen5 up.

This patch was also tested to fix fbo-flushing on gen7.
---
 src/mesa/drivers/dri/intel/intel_batchbuffer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 97cc219ce6d..db4343be10c 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -390,6 +390,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel)
 	 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
 		   PIPE_CONTROL_WRITE_FLUSH |
 		   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		   PIPE_CONTROL_TC_FLUSH |
 		   PIPE_CONTROL_NO_WRITE);
 	 OUT_BATCH(0); /* write address */
 	 OUT_BATCH(0); /* write data */

From 808024689247561d3de225856fb6ef17430fd39e Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 21 Jul 2011 09:15:05 -0700
Subject: [PATCH 078/600] meta: Also save/restore clip planes for GLSL.

Fixes user-clip on 965 with 3D clears enabled.  I created a separate
flag because I wanted to avoid the overhead of the matrix operations
in this path.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/drivers/common/meta.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index fa78674e4eb..26c89519679 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -90,13 +90,14 @@
 #define META_SCISSOR         0x100
 #define META_SHADER          0x200
 #define META_STENCIL_TEST    0x400
-#define META_TRANSFORM       0x800 /**< modelview, projection, clip planes */
+#define META_TRANSFORM       0x800 /**< modelview/projection matrix state */
 #define META_TEXTURE        0x1000
 #define META_VERTEX         0x2000
 #define META_VIEWPORT       0x4000
 #define META_CLAMP_FRAGMENT_COLOR 0x8000
 #define META_CLAMP_VERTEX_COLOR 0x10000
 #define META_CONDITIONAL_RENDER 0x20000
+#define META_CLIP          0x40000
 /*@}*/
 
 
@@ -165,6 +166,8 @@ struct save_state
    GLfloat ModelviewMatrix[16];
    GLfloat ProjectionMatrix[16];
    GLfloat TextureMatrix[16];
+
+   /** META_CLIP */
    GLbitfield ClipPlanesEnabled;
 
    /** META_TEXTURE */
@@ -547,6 +550,9 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       _mesa_Ortho(0.0, ctx->DrawBuffer->Width,
                   0.0, ctx->DrawBuffer->Height,
                   -1.0, 1.0);
+   }
+
+   if (state & META_CLIP) {
       save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled;
       if (ctx->Transform.ClipPlanesEnabled) {
          GLuint i;
@@ -846,7 +852,9 @@ _mesa_meta_end(struct gl_context *ctx)
       _mesa_LoadMatrixf(save->ProjectionMatrix);
 
       _mesa_MatrixMode(save->MatrixMode);
+   }
 
+   if (state & META_CLIP) {
       if (save->ClipPlanesEnabled) {
          GLuint i;
          for (i = 0; i < ctx->Const.MaxClipPlanes; i++) {
@@ -1669,6 +1677,7 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
 	       META_STENCIL_TEST |
 	       META_VERTEX |
 	       META_VIEWPORT |
+	       META_CLIP |
 	       META_CLAMP_FRAGMENT_COLOR);
 
    if (!(buffers & BUFFER_BITS_COLOR)) {
@@ -1783,6 +1792,7 @@ _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY,
                           META_SHADER |
                           META_TEXTURE |
                           META_TRANSFORM |
+                          META_CLIP |
                           META_VERTEX |
                           META_VIEWPORT));
 
@@ -2104,6 +2114,7 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,
                           META_SHADER |
                           META_TEXTURE |
                           META_TRANSFORM |
+                          META_CLIP |
                           META_VERTEX |
                           META_VIEWPORT |
 			  META_CLAMP_FRAGMENT_COLOR |
@@ -2313,6 +2324,7 @@ _mesa_meta_Bitmap(struct gl_context *ctx,
                           META_SHADER |
                           META_TEXTURE |
                           META_TRANSFORM |
+                          META_CLIP |
                           META_VERTEX |
                           META_VIEWPORT));
 

From a0e5affb22da50aeb30262f5ba0912b059d858ea Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 19 May 2011 11:02:14 -0700
Subject: [PATCH 079/600] i965: Use 3D clears on gen6+ to avoid inter-ring
 synchronization.

Improves firefox-talos-gfx around 5%.
---
 src/mesa/drivers/dri/intel/intel_clear.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index 81c062fba53..76d33f9b37e 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -116,13 +116,13 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
    }
 
    /* HW color buffers (front, back, aux, generic FBO, etc) */
-   if (colorMask == ~0) {
+   if (intel->gen < 6 && colorMask == ~0) {
       /* clear all R,G,B,A */
       blit_mask |= (mask & BUFFER_BITS_COLOR);
    }
    else {
       /* glColorMask in effect */
-      tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT));
+      tri_mask |= (mask & BUFFER_BITS_COLOR);
    }
 
    /* Make sure we have up to date buffers before we start looking at

From 818db3848bfaa002d0e7cf6b9b615a31eb82ba25 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 10:56:10 -0700
Subject: [PATCH 080/600] i965: Fix many of the trivial WebGL demos that broke
 due to IB optimization.

The index buffer state emit only occurred if there was an IB in place
and we were in either a new batch or a new IB state.  But because we
only flagged new IB state if IB state changed from the last IB state
we calculated, we could simply never emit IB state after batchbuffer
wraps if the first draw didn't use the IB and we didn't actually
change the IB.

Fixes piglit glx-multi-context-ib-1.
---
 src/mesa/drivers/dri/i965/brw_vtbl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 55dbd4fa8b0..40360b23fff 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -213,6 +213,7 @@ static void brw_new_batch( struct intel_context *intel )
    brw->state_batch_count = 0;
 
    brw->vb.nr_current_buffers = 0;
+   brw->ib.type = -1;
 
    /* Mark that the current program cache BO has been used by the GPU.
     * It will be reallocated if we need to put new programs in for the

From 28a336dc38c478b809544e7404c4d1fddd873333 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 22 Jul 2011 18:58:30 +0200
Subject: [PATCH 081/600] winsys/radeon: simplify how value queries work

This drops the get_value query and adds a function query_info, which returns
all the values in one nice structure.
---
 src/gallium/drivers/r300/r300_chipset.c       |   6 +-
 src/gallium/drivers/r300/r300_chipset.h       |   8 +-
 src/gallium/drivers/r300/r300_context.c       |  20 +--
 src/gallium/drivers/r300/r300_emit.c          |  11 +-
 src/gallium/drivers/r300/r300_query.c         |   4 +-
 src/gallium/drivers/r300/r300_screen.c        |  11 +-
 src/gallium/drivers/r300/r300_screen.h        |  11 +-
 src/gallium/drivers/r300/r300_texture_desc.c  |   4 +-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c |   4 +-
 .../winsys/radeon/drm/radeon_drm_winsys.c     | 154 ++++++++----------
 .../winsys/radeon/drm/radeon_drm_winsys.h     |  12 +-
 src/gallium/winsys/radeon/drm/radeon_winsys.h |  61 +++----
 12 files changed, 134 insertions(+), 172 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 571986c3011..80148b80afb 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -31,9 +31,9 @@
  * Radeons. */
 
 /* Parse a PCI ID and fill an r300_capabilities struct with information. */
-void r300_parse_chipset(struct r300_capabilities* caps)
+void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps)
 {
-    switch (caps->pci_id) {
+    switch (pci_id) {
 #define CHIPSET(pci_id, name, chipfamily) \
         case pci_id: \
             caps->family = CHIP_FAMILY_##chipfamily; \
@@ -43,7 +43,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
 
     default:
         fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\nAborting...",
-                caps->pci_id);
+                pci_id);
         abort();
     }
 
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index 4df6b5b6292..f96cdaf2580 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -43,16 +43,10 @@ enum r300_zmask_compression {
 /* Structure containing all the possible information about a specific Radeon
  * in the R3xx, R4xx, and R5xx families. */
 struct r300_capabilities {
-    /* PCI ID */
-    uint32_t pci_id;
     /* Chipset family */
     int family;
     /* The number of vertex floating-point units */
     unsigned num_vert_fpus;
-    /* The number of fragment pipes */
-    unsigned num_frag_pipes;
-    /* The number of z pipes */
-    unsigned num_z_pipes;
     /* The number of texture units. */
     unsigned num_tex_units;
     /* Whether or not TCL is physically present */
@@ -121,6 +115,6 @@ enum {
     CHIP_FAMILY_RV570
 };
 
-void r300_parse_chipset(struct r300_capabilities* caps);
+void r300_parse_chipset(uint32_t pci_id, struct r300_capabilities* caps);
 
 #endif /* R300_CHIPSET_H */
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index d94ac74f0e5..2b3329e9f86 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -173,7 +173,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
     boolean is_rv350 = r300->screen->caps.is_rv350;
     boolean is_r500 = r300->screen->caps.is_r500;
     boolean has_tcl = r300->screen->caps.has_tcl;
-    boolean drm_2_6_0 = r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0);
+    boolean drm_2_6_0 = r300->screen->info.drm_minor >= 6;
 
     /* Create the actual atom list.
      *
@@ -380,7 +380,7 @@ static void r300_init_states(struct pipe_context *pipe)
 
         if (r300->screen->caps.is_r500 ||
             (r300->screen->caps.is_rv350 &&
-             r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0))) {
+             r300->screen->info.drm_minor >= 6)) {
             OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0);
         }
         END_CB;
@@ -520,15 +520,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
                 "r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n"
                 "r300: GART size: %d MB, VRAM size: %d MB\n"
                 "r300: AA compression RAM: %s, Z compression RAM: %s, HiZ RAM: %s\n",
-                rws->get_value(rws, RADEON_VID_DRM_MAJOR),
-                rws->get_value(rws, RADEON_VID_DRM_MINOR),
-                rws->get_value(rws, RADEON_VID_DRM_PATCHLEVEL),
+                r300->screen->info.drm_major,
+                r300->screen->info.drm_minor,
+                r300->screen->info.drm_patchlevel,
                 screen->get_name(screen),
-                rws->get_value(rws, RADEON_VID_PCI_ID),
-                rws->get_value(rws, RADEON_VID_R300_GB_PIPES),
-                rws->get_value(rws, RADEON_VID_R300_Z_PIPES),
-                rws->get_value(rws, RADEON_VID_GART_SIZE) >> 20,
-                rws->get_value(rws, RADEON_VID_VRAM_SIZE) >> 20,
+                r300->screen->info.pci_id,
+                r300->screen->info.r300_num_gb_pipes,
+                r300->screen->info.r300_num_z_pipes,
+                r300->screen->info.gart_size >> 20,
+                r300->screen->info.vram_size >> 20,
                 "YES", /* XXX really? */
                 r300->screen->caps.zmask_ram ? "YES" : "NO",
                 r300->screen->caps.hiz_ram ? "YES" : "NO");
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index d214af4cd5b..502aed3a20c 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -574,11 +574,12 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
                                            struct r300_query *query)
 {
     struct r300_capabilities* caps = &r300->screen->caps;
+    uint32_t gb_pipes = r300->screen->info.r300_num_gb_pipes;
     CS_LOCALS(r300);
 
-    assert(caps->num_frag_pipes);
+    assert(gb_pipes);
 
-    BEGIN_CS(6 * caps->num_frag_pipes + 2);
+    BEGIN_CS(6 * gb_pipes + 2);
     /* I'm not so sure I like this switch, but it's hard to be elegant
      * when there's so many special cases...
      *
@@ -587,7 +588,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
      * 4-byte offset for each pipe. RV380 and older are special; they have
      * only two pipes, and the second pipe's enable is on bit 3, not bit 1,
      * so there's a chipset cap for that. */
-    switch (caps->num_frag_pipes) {
+    switch (gb_pipes) {
         case 4:
             /* pipe 3 only */
             OUT_CS_REG(R300_SU_REG_DEST, 1 << 3);
@@ -613,7 +614,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
             break;
         default:
             fprintf(stderr, "r300: Implementation error: Chipset reports %d"
-                    " pixel pipes!\n", caps->num_frag_pipes);
+                    " pixel pipes!\n", gb_pipes);
             abort();
     }
 
@@ -663,7 +664,7 @@ void r300_emit_query_end(struct r300_context* r300)
         return;
 
     if (caps->family == CHIP_FAMILY_RV530) {
-        if (caps->num_z_pipes == 2)
+        if (r300->screen->info.r300_num_z_pipes == 2)
             rv530_emit_query_end_double_z(r300, query);
         else
             rv530_emit_query_end_single_z(r300, query);
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 782f041e926..000114129bf 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -49,9 +49,9 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
     q->buffer_size = 4096;
 
     if (r300screen->caps.family == CHIP_FAMILY_RV530)
-        q->num_pipes = r300screen->caps.num_z_pipes;
+        q->num_pipes = r300screen->info.r300_num_z_pipes;
     else
-        q->num_pipes = r300screen->caps.num_frag_pipes;
+        q->num_pipes = r300screen->info.r300_num_gb_pipes;
 
     insert_at_tail(&r300->query_list, q);
 
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index c8df45fb3e7..d9378308ad0 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -327,9 +327,8 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
                                         unsigned sample_count,
                                         unsigned usage)
 {
-    struct radeon_winsys *rws = r300_screen(screen)->rws;
     uint32_t retval = 0;
-    boolean drm_2_8_0 = rws->get_value(rws, RADEON_VID_DRM_2_8_0);
+    boolean drm_2_8_0 = r300_screen(screen)->info.drm_minor >= 8;
     boolean is_r500 = r300_screen(screen)->caps.is_r500;
     boolean is_r400 = r300_screen(screen)->caps.is_r400;
     boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM ||
@@ -497,19 +496,17 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws)
         return NULL;
     }
 
-    r300screen->caps.pci_id = rws->get_value(rws, RADEON_VID_PCI_ID);
-    r300screen->caps.num_frag_pipes = rws->get_value(rws, RADEON_VID_R300_GB_PIPES);
-    r300screen->caps.num_z_pipes = rws->get_value(rws, RADEON_VID_R300_Z_PIPES);
+    rws->query_info(rws, &r300screen->info);
 
     r300_init_debug(r300screen);
-    r300_parse_chipset(&r300screen->caps);
+    r300_parse_chipset(r300screen->info.pci_id, &r300screen->caps);
 
     if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK))
         r300screen->caps.zmask_ram = 0;
     if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ))
         r300screen->caps.hiz_ram = 0;
 
-    if (!rws->get_value(rws, RADEON_VID_DRM_2_8_0))
+    if (r300screen->info.drm_minor < 8)
         r300screen->caps.has_us_format = FALSE;
 
     pipe_mutex_init(r300screen->num_contexts_mutex);
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index e5c53bf3500..82b2068e7a0 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -24,23 +24,20 @@
 #ifndef R300_SCREEN_H
 #define R300_SCREEN_H
 
-#include "pipe/p_screen.h"
-
 #include "r300_chipset.h"
-
+#include "../../winsys/radeon/drm/radeon_winsys.h"
+#include "pipe/p_screen.h"
 #include "util/u_slab.h"
-
 #include <stdio.h>
 
-struct radeon_winsys;
-
 struct r300_screen {
     /* Parent class */
     struct pipe_screen screen;
 
     struct radeon_winsys *rws;
 
-    /* Chipset capabilities */
+    /* Chipset info and capabilities. */
+    struct radeon_info info;
     struct r300_capabilities caps;
 
     /* Memory pools. */
diff --git a/src/gallium/drivers/r300/r300_texture_desc.c b/src/gallium/drivers/r300/r300_texture_desc.c
index da5778be65e..fe4f8dd5679 100644
--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -360,9 +360,9 @@ static void r300_setup_hyperz_properties(struct r300_screen *screen,
         unsigned i, pipes;
 
         if (screen->caps.family == CHIP_FAMILY_RV530) {
-            pipes = screen->caps.num_z_pipes;
+            pipes = screen->info.r300_num_z_pipes;
         } else {
-            pipes = screen->caps.num_frag_pipes;
+            pipes = screen->info.r300_num_gb_pipes;
         }
 
         for (i = 0; i <= tex->b.b.b.last_level; i++) {
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 0139de1973a..f0f4a70be3f 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -308,8 +308,8 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
 {
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
 
-    return cs->csc->used_gart < cs->ws->gart_size * 0.8 &&
-           cs->csc->used_vram < cs->ws->vram_size * 0.8;
+    return cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
+           cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
 }
 
 static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 0474b381ade..473f388d121 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -103,17 +103,31 @@ static boolean radeon_set_fd_access(struct radeon_drm_cs *applier,
     return FALSE;
 }
 
+static boolean radeon_get_drm_value(int fd, unsigned request,
+                                    const char *name, uint32_t *out)
+{
+    struct drm_radeon_info info = {0};
+    int retval;
+
+    info.value = (unsigned long)out;
+    info.request = request;
+
+    retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
+    if (retval) {
+        fprintf(stderr, "%s: Failed to get %s, error number %d\n",
+                __func__, name, retval);
+        return FALSE;
+    }
+    return TRUE;
+}
+
 /* Helper function to do the ioctls needed for setup and init. */
-static void do_ioctls(struct radeon_drm_winsys *winsys)
+static boolean do_winsys_init(struct radeon_drm_winsys *ws)
 {
     struct drm_radeon_gem_info gem_info = {0};
-    struct drm_radeon_info info = {0};
-    int target = 0;
     int retval;
     drmVersionPtr version;
 
-    info.value = (unsigned long)&target;
-
     /* We do things in a specific order here.
      *
      * DRM version first. We need to be sure we're running on a KMS chipset.
@@ -123,71 +137,76 @@ static void do_ioctls(struct radeon_drm_winsys *winsys)
      * for all Radeons. If this fails, we probably got handed an FD for some
      * non-Radeon card.
      *
+     * The GEM info is actually bogus on the kernel side, as well as our side
+     * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because
+     * we don't actually use the info for anything yet.
+     *
      * The GB and Z pipe requests should always succeed, but they might not
      * return sensical values for all chipsets, but that's alright because
      * the pipe drivers already know that.
-     *
-     * The GEM info is actually bogus on the kernel side, as well as our side
-     * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because
-     * we don't actually use the info for anything yet. */
+     */
 
-    version = drmGetVersion(winsys->fd);
+    /* Get DRM version. */
+    version = drmGetVersion(ws->fd);
     if (version->version_major != 2 ||
         version->version_minor < 3) {
         fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
-                "only compatible with 2.3.x (kernel 2.6.34) and later.\n",
+                "only compatible with 2.3.x (kernel 2.6.34) or later.\n",
                 __FUNCTION__,
                 version->version_major,
                 version->version_minor,
                 version->version_patchlevel);
         drmFreeVersion(version);
-        exit(1);
+        return FALSE;
     }
 
-    winsys->drm_major = version->version_major;
-    winsys->drm_minor = version->version_minor;
-    winsys->drm_patchlevel = version->version_patchlevel;
+    ws->info.drm_major = version->version_major;
+    ws->info.drm_minor = version->version_minor;
+    ws->info.drm_patchlevel = version->version_patchlevel;
+    drmFreeVersion(version);
 
-    info.request = RADEON_INFO_DEVICE_ID;
-    retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info));
-    if (retval) {
-        fprintf(stderr, "%s: Failed to get PCI ID, "
-                "error number %d\n", __FUNCTION__, retval);
-        exit(1);
+    /* Get PCI ID. */
+    if (!radeon_get_drm_value(ws->fd, RADEON_INFO_DEVICE_ID, "PCI ID",
+                              &ws->info.pci_id))
+        return FALSE;
+
+    /* Check PCI ID. */
+    switch (ws->info.pci_id) {
+#define CHIPSET(pci_id, name, family) case pci_id:
+#include "pci_ids/r300_pci_ids.h"
+#undef CHIPSET
+        break;
+
+    default:
+        fprintf(stderr, "radeon: Invalid PCI ID.\n");
+        return FALSE;
     }
-    winsys->pci_id = target;
 
-    info.request = RADEON_INFO_NUM_GB_PIPES;
-    retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info));
-    if (retval) {
-        fprintf(stderr, "%s: Failed to get GB pipe count, "
-                "error number %d\n", __FUNCTION__, retval);
-        exit(1);
-    }
-    winsys->gb_pipes = target;
-
-    info.request = RADEON_INFO_NUM_Z_PIPES;
-    retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_INFO, &info, sizeof(info));
-    if (retval) {
-        fprintf(stderr, "%s: Failed to get Z pipe count, "
-                "error number %d\n", __FUNCTION__, retval);
-        exit(1);
-    }
-    winsys->z_pipes = target;
-
-    retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_GEM_INFO,
+    /* Get GEM info. */
+    retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO,
             &gem_info, sizeof(gem_info));
     if (retval) {
         fprintf(stderr, "%s: Failed to get MM info, error number %d\n",
                 __FUNCTION__, retval);
-        exit(1);
+        return FALSE;
     }
-    winsys->gart_size = gem_info.gart_size;
-    winsys->vram_size = gem_info.vram_size;
+    ws->info.gart_size = gem_info.gart_size;
+    ws->info.vram_size = gem_info.vram_size;
 
-    drmFreeVersion(version);
+    ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 
-    winsys->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+    /* Generation-specific queries. */
+    if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES,
+                              "GB pipe count",
+                              &ws->info.r300_num_gb_pipes))
+        return FALSE;
+
+    if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES,
+                              "Z pipe count",
+                              &ws->info.r300_num_z_pipes))
+        return FALSE;
+
+    return TRUE;
 }
 
 static void radeon_winsys_destroy(struct radeon_winsys *rws)
@@ -202,34 +221,10 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
     FREE(rws);
 }
 
-static uint32_t radeon_get_value(struct radeon_winsys *rws,
-                                 enum radeon_value_id id)
+static void radeon_query_info(struct radeon_winsys *rws,
+                              struct radeon_info *info)
 {
-    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)rws;
-
-    switch(id) {
-    case RADEON_VID_PCI_ID:
-	return ws->pci_id;
-    case RADEON_VID_R300_GB_PIPES:
-	return ws->gb_pipes;
-    case RADEON_VID_R300_Z_PIPES:
-	return ws->z_pipes;
-    case RADEON_VID_GART_SIZE:
-        return ws->gart_size;
-    case RADEON_VID_VRAM_SIZE:
-        return ws->vram_size;
-    case RADEON_VID_DRM_MAJOR:
-        return ws->drm_major;
-    case RADEON_VID_DRM_MINOR:
-        return ws->drm_minor;
-    case RADEON_VID_DRM_PATCHLEVEL:
-        return ws->drm_patchlevel;
-    case RADEON_VID_DRM_2_6_0:
-        return ws->drm_major*100 + ws->drm_minor >= 206;
-    case RADEON_VID_DRM_2_8_0:
-        return ws->drm_major*100 + ws->drm_minor >= 208;
-    }
-    return 0;
+    *info = ((struct radeon_drm_winsys *)rws)->info;
 }
 
 static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs,
@@ -268,16 +263,9 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd)
     }
 
     ws->fd = fd;
-    do_ioctls(ws);
 
-    switch (ws->pci_id) {
-#define CHIPSET(pci_id, name, family) case pci_id:
-#include "pci_ids/r300_pci_ids.h"
-#undef CHIPSET
-       break;
-    default:
-       goto fail;
-    }
+    if (!do_winsys_init(ws))
+        goto fail;
 
     /* Create managers. */
     ws->kman = radeon_bomgr_create(ws);
@@ -289,7 +277,7 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd)
 
     /* Set functions. */
     ws->base.destroy = radeon_winsys_destroy;
-    ws->base.get_value = radeon_get_value;
+    ws->base.query_info = radeon_query_info;
     ws->base.cs_request_feature = radeon_cs_request_feature;
 
     radeon_bomgr_init_functions(ws);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
index d5186bc4d17..347e1f1d11a 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -31,7 +31,6 @@
 #define RADEON_DRM_WINSYS_H
 
 #include "radeon_winsys.h"
-
 #include "os/os_thread.h"
 
 struct radeon_drm_winsys {
@@ -40,20 +39,13 @@ struct radeon_drm_winsys {
     int fd; /* DRM file descriptor */
     int num_cs; /* The number of command streams created. */
 
+    struct radeon_info info;
+
     struct pb_manager *kman;
     struct pb_manager *cman;
 
-    uint32_t pci_id;        /* PCI ID */
-    uint32_t gb_pipes;      /* GB pipe count */
-    uint32_t z_pipes;       /* Z pipe count (rv530 only) */
-    uint32_t gart_size;     /* GART size. */
-    uint32_t vram_size;     /* VRAM size. */
     uint32_t num_cpus;      /* Number of CPUs. */
 
-    unsigned drm_major;
-    unsigned drm_minor;
-    unsigned drm_patchlevel;
-
     struct radeon_drm_cs *hyperz_owner;
     pipe_mutex hyperz_owner_mutex;
     struct radeon_drm_cs *cmask_owner;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 3a64e4abc35..915a9c5bad1 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -26,6 +26,20 @@
 
 /* The public winsys interface header for the radeon driver. */
 
+/* R300 features in DRM.
+ *
+ * 2.6.0:
+ * - Hyper-Z
+ * - GB_Z_PEQ_CONFIG on rv350->r4xx
+ * - R500 FG_ALPHA_VALUE
+ *
+ * 2.8.0:
+ * - R500 US_FORMAT regs
+ * - R500 ARGB2101010 colorbuffer
+ * - CMask and AA regs
+ * - R16F/RG16F
+ */
+
 #include "pipebuffer/pb_bufmgr.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
@@ -55,38 +69,17 @@ struct radeon_winsys_cs {
     uint32_t *buf; /* The command buffer. */
 };
 
-enum radeon_value_id {
-    RADEON_VID_PCI_ID,
-    RADEON_VID_R300_GB_PIPES,
-    RADEON_VID_R300_Z_PIPES,
-    RADEON_VID_GART_SIZE,
-    RADEON_VID_VRAM_SIZE,
-    RADEON_VID_DRM_MAJOR,
-    RADEON_VID_DRM_MINOR,
-    RADEON_VID_DRM_PATCHLEVEL,
+struct radeon_info {
+    uint32_t pci_id;
+    uint32_t gart_size;
+    uint32_t vram_size;
 
-    /* These should probably go away: */
+    uint32_t drm_major; /* version */
+    uint32_t drm_minor;
+    uint32_t drm_patchlevel;
 
-    /* R300 features:
-     * - Hyper-Z
-     * - GB_Z_PEQ_CONFIG on rv350->r4xx
-     * - R500 FG_ALPHA_VALUE
-     *
-     * R600 features:
-     * - TBD
-     */
-    RADEON_VID_DRM_2_6_0,
-
-    /* R300 features:
-     * - R500 US_FORMAT regs
-     * - R500 ARGB2101010 colorbuffer
-     * - CMask and AA regs
-     * - R16F/RG16F
-     *
-     * R600 features:
-     * - TBD
-     */
-    RADEON_VID_DRM_2_8_0,
+    uint32_t r300_num_gb_pipes;
+    uint32_t r300_num_z_pipes;
 };
 
 enum radeon_feature_id {
@@ -103,13 +96,13 @@ struct radeon_winsys {
     void (*destroy)(struct radeon_winsys *ws);
 
     /**
-     * Query a system value from a winsys.
+     * Query an info structure from winsys.
      *
      * \param ws        The winsys this function is called from.
-     * \param vid       One of the RADEON_VID_* enums.
+     * \param info      Return structure
      */
-    uint32_t (*get_value)(struct radeon_winsys *ws,
-                          enum radeon_value_id vid);
+    void (*query_info)(struct radeon_winsys *ws,
+                       struct radeon_info *info);
 
     /**************************************************************************
      * Buffer management. Buffer attributes are mostly fixed over its lifetime.

From ce9daf6f0bda857c9ee5d021cfb444db6376bfe7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 22 Jul 2011 19:14:23 +0200
Subject: [PATCH 082/600] winsys/radeon: add R300 infix to winsys feature names

---
 src/gallium/drivers/r300/r300_blit.c              | 2 +-
 src/gallium/drivers/r300/r300_context.c           | 2 +-
 src/gallium/drivers/r300/r300_flush.c             | 2 +-
 src/gallium/targets/dri-r300/target.c             | 1 -
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 4 ++--
 src/gallium/winsys/radeon/drm/radeon_winsys.h     | 4 ++--
 6 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index 388ebcdbf32..db97e496e19 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -247,7 +247,7 @@ static void r300_clear(struct pipe_context* pipe,
             if (!r300->hyperz_enabled) {
                 r300->hyperz_enabled =
                     r300->rws->cs_request_feature(r300->cs,
-                                                RADEON_FID_HYPERZ_RAM_ACCESS,
+                                                RADEON_FID_R300_HYPERZ_ACCESS,
                                                 TRUE);
                 if (r300->hyperz_enabled) {
                    /* Need to emit HyperZ buffer regs for the first time. */
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 2b3329e9f86..5c222588e47 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -99,7 +99,7 @@ static void r300_destroy_context(struct pipe_context* context)
     struct r300_context* r300 = r300_context(context);
 
     if (r300->cs && r300->hyperz_enabled) {
-        r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, FALSE);
+        r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS, FALSE);
     }
 
     if (r300->blitter)
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 34f5419a864..4c6beea5a55 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -121,7 +121,7 @@ void r300_flush(struct pipe_context *pipe,
         }
 
         /* Release HyperZ. */
-        r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS,
+        r300->rws->cs_request_feature(r300->cs, RADEON_FID_R300_HYPERZ_ACCESS,
                                       FALSE);
     }
     r300->num_z_clears = 0;
diff --git a/src/gallium/targets/dri-r300/target.c b/src/gallium/targets/dri-r300/target.c
index b48bcad3710..9b6d816fb62 100644
--- a/src/gallium/targets/dri-r300/target.c
+++ b/src/gallium/targets/dri-r300/target.c
@@ -1,4 +1,3 @@
-
 #include "target-helpers/inline_debug_helper.h"
 #include "state_tracker/drm_driver.h"
 #include "radeon/drm/radeon_drm_public.h"
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 473f388d121..5983e86c570 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -234,7 +234,7 @@ static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs,
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
 
     switch (fid) {
-    case RADEON_FID_HYPERZ_RAM_ACCESS:
+    case RADEON_FID_R300_HYPERZ_ACCESS:
         if (debug_get_bool_option("RADEON_HYPERZ", FALSE)) {
             return radeon_set_fd_access(cs, &cs->ws->hyperz_owner,
                                         &cs->ws->hyperz_owner_mutex,
@@ -243,7 +243,7 @@ static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs,
             return FALSE;
         }
 
-    case RADEON_FID_CMASK_RAM_ACCESS:
+    case RADEON_FID_R300_CMASK_ACCESS:
         if (debug_get_bool_option("RADEON_CMASK", FALSE)) {
             return radeon_set_fd_access(cs, &cs->ws->cmask_owner,
                                         &cs->ws->cmask_owner_mutex,
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 915a9c5bad1..4fcda4f1b11 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -83,8 +83,8 @@ struct radeon_info {
 };
 
 enum radeon_feature_id {
-    RADEON_FID_HYPERZ_RAM_ACCESS,     /* ZMask + HiZ */
-    RADEON_FID_CMASK_RAM_ACCESS,
+    RADEON_FID_R300_HYPERZ_ACCESS,     /* ZMask + HiZ */
+    RADEON_FID_R300_CMASK_ACCESS,
 };
 
 struct radeon_winsys {

From 67c995e0f1b50ff08784e97482ca3e9e0bfd42ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 22 Jul 2011 19:20:25 +0200
Subject: [PATCH 083/600] winsys/radeon: little change in
 radeon_bo_is_referenced_by_cs

---
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index 339beedc6ab..b8b170adcbe 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -88,8 +88,9 @@ static INLINE boolean
 radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
                               struct radeon_bo *bo)
 {
-    return bo->num_cs_references == bo->rws->num_cs ||
-           (bo->num_cs_references && radeon_get_reloc(cs->csc, bo) != -1);
+    int num_refs = bo->num_cs_references;
+    return num_refs == bo->rws->num_cs ||
+           (num_refs && radeon_get_reloc(cs->csc, bo) != -1);
 }
 
 static INLINE boolean

From e22a1005c0913b404ae82650cdc4f58bcbd5445b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 22 Jul 2011 19:22:50 +0200
Subject: [PATCH 084/600] winsys/radeon: fix int->boolean conversion in
 radeon_bo_is_referenced_by_any_cs

---
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index b8b170adcbe..ea2a820b30a 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -112,7 +112,7 @@ radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs,
 static INLINE boolean
 radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
 {
-    return bo->num_cs_references;
+    return bo->num_cs_references != 0;
 }
 
 void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs);

From 7db148d3a5a350f80df8dc588e0079fda7aa378a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Sat, 23 Jul 2011 04:11:31 +0200
Subject: [PATCH 085/600] winsys/radeon: remove usage parameter from
 buffer_create

---
 src/gallium/drivers/r300/r300_flush.c         | 1 -
 src/gallium/drivers/r300/r300_query.c         | 3 +--
 src/gallium/drivers/r300/r300_screen_buffer.c | 3 +--
 src/gallium/drivers/r300/r300_texture.c       | 2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 6 ++----
 src/gallium/winsys/radeon/drm/radeon_winsys.h | 2 --
 6 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 4c6beea5a55..dc596c4122a 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -76,7 +76,6 @@ void r300_flush(struct pipe_context *pipe,
         /* Create a fence, which is a dummy BO. */
         *rfence = r300->rws->buffer_create(r300->rws, 1, 1,
                                            PIPE_BIND_VERTEX_BUFFER,
-                                           PIPE_USAGE_STATIC,
                                            RADEON_DOMAIN_GTT);
         /* Add the fence as a dummy relocation. */
         r300->rws->cs_add_reloc(r300->cs,
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 000114129bf..c0357f9d035 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -57,8 +57,7 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
 
     /* Open up the occlusion query buffer. */
     q->buf = r300->rws->buffer_create(r300->rws, q->buffer_size, 4096,
-                                         PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM,
-                                         q->domain);
+                                         PIPE_BIND_CUSTOM, q->domain);
     q->cs_buf = r300->rws->buffer_get_cs_handle(q->buf);
 
     return (struct pipe_query*)q;
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index 4154c81512e..c751a943b96 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -201,8 +201,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
     rbuf->buf =
         r300screen->rws->buffer_create(r300screen->rws,
                                        rbuf->b.b.b.width0, alignment,
-                                       rbuf->b.b.b.bind, rbuf->b.b.b.usage,
-                                       rbuf->domain);
+                                       rbuf->b.b.b.bind, rbuf->domain);
     if (!rbuf->buf) {
         util_slab_free(&r300screen->pool_buffers, rbuf);
         return NULL;
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 62c2f1fff6c..08fccbe51c5 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -926,7 +926,7 @@ r300_texture_create_object(struct r300_screen *rscreen,
     if (!buffer) {
         tex->buf_size = tex->tex.size_in_bytes;
         tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048,
-                                         base->bind, base->usage, tex->domain);
+                                      base->bind, tex->domain);
 
         if (!tex->buf) {
             FREE(tex);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index b6f12727e81..7f02a5abda5 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -468,8 +468,7 @@ static struct radeon_winsys_cs_handle *radeon_drm_get_cs_handle(
             get_radeon_bo(pb_buffer(_buf));
 }
 
-static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage,
-                                               enum radeon_bo_domain domain)
+static unsigned get_pb_usage_from_create_flags(enum radeon_bo_domain domain)
 {
     unsigned res = 0;
 
@@ -487,7 +486,6 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
                         unsigned size,
                         unsigned alignment,
                         unsigned bind,
-                        unsigned usage,
                         enum radeon_bo_domain domain)
 {
     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
@@ -497,7 +495,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
 
     memset(&desc, 0, sizeof(desc));
     desc.alignment = alignment;
-    desc.usage = get_pb_usage_from_create_flags(bind, usage, domain);
+    desc.usage = get_pb_usage_from_create_flags(domain);
 
     /* Assign a buffer manager. */
     if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 4fcda4f1b11..2a0f025ebc4 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -119,7 +119,6 @@ struct radeon_winsys {
      * \param size      The size to allocate.
      * \param alignment An alignment of the buffer in memory.
      * \param bind      A bitmask of the PIPE_BIND_* flags.
-     * \param usage     A bitmask of the PIPE_USAGE_* flags.
      * \param domain    A bitmask of the RADEON_DOMAIN_* flags.
      * \return          The created buffer object.
      */
@@ -127,7 +126,6 @@ struct radeon_winsys {
                                        unsigned size,
                                        unsigned alignment,
                                        unsigned bind,
-                                       unsigned usage,
                                        enum radeon_bo_domain domain);
 
     struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)(

From 533e2289235c61eff9a14bb24da7c8a1ff0b0afa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 22 Jul 2011 22:14:39 +0200
Subject: [PATCH 086/600] winsys/radeon: manage constant buffers by the cache
 bufmgr too

---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 7f02a5abda5..796262ccfdb 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -498,7 +498,8 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
     desc.usage = get_pb_usage_from_create_flags(domain);
 
     /* Assign a buffer manager. */
-    if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
+    if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
+                PIPE_BIND_CONSTANT_BUFFER))
 	provider = ws->cman;
     else
         provider = ws->kman;

From f170555a18a742ed8ecb9e04cd02a5cb414c27ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Sun, 24 Jul 2011 23:59:44 +0200
Subject: [PATCH 087/600] winsys/radeon: fix typos in the driver interface

---
 src/gallium/winsys/radeon/drm/radeon_winsys.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 2a0f025ebc4..f8a4d3abd43 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -297,7 +297,7 @@ struct radeon_winsys {
      */
     void (*cs_set_flush)(struct radeon_winsys_cs *cs,
                          void (*flush)(void *ctx, unsigned flags),
-                         void *user);
+                         void *ctx);
 
     /**
      * Return TRUE if a buffer is referenced by a command stream.
@@ -312,7 +312,8 @@ struct radeon_winsys {
      * Request access to a feature for a command stream.
      *
      * \param cs        A command stream.
-     * \param fid       A winsys buffer.
+     * \param fid       Feature ID, one of RADEON_FID_*
+     * \param enable	Whether to enable or disable the feature.
      */
     boolean (*cs_request_feature)(struct radeon_winsys_cs *cs,
                                   enum radeon_feature_id fid,

From 84f8548dfcc7de55e162359e2e39af2614903cbe Mon Sep 17 00:00:00 2001
From: Tobias Droste <tdroste@gmx.de>
Date: Mon, 18 Jul 2011 07:14:06 +0200
Subject: [PATCH 088/600] r300/compiler: simplify code in
 peephole_add_presub_add
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Tobias Droste <tdroste@gmx.de>
Signed-off-by: Marek Olšák <maraeo@gmail.com>
---
 .../dri/r300/compiler/radeon_optimize.c       | 35 ++++++++++---------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index b24274259f4..39dcb21d4f4 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -561,28 +561,29 @@ static int peephole_add_presub_add(
 	struct rc_instruction * inst_add)
 {
 	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
-	struct rc_src_register * src1 = NULL;
-	unsigned int i;
-
-	if (!is_presub_candidate(c, inst_add))
-		return 0;
+        unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
+        unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
 
 	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
 		return 0;
 
-	/* XXX This isn't fully implemented, is it? */
-	/*   src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
-	for (i = 0; i < 2; i++) {
-		if (inst_add->U.I.SrcReg[i].Abs)
-			return 0;
+	/* src0 and src1 can't have absolute values */
+	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
+	        return 0;
 
-		/* XXX This looks weird, but it's basically what was here before this commit (see git blame): */
-		if ((inst_add->U.I.SrcReg[i].Negate & dstmask) != dstmask && !src1) {
-			src1 = &inst_add->U.I.SrcReg[i];
-		}
-	}
+	/* presub_replace_add() assumes only one is negative */
+	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
+	        return 0;
 
-	if (!src1)
+        /* if src0 is negative, at least all bits of dstmask have to be set */
+        if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
+	        return 0;
+
+        /* if src1 is negative, at least all bits of dstmask have to be set */
+        if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
+	        return 0;
+
+	if (!is_presub_candidate(c, inst_add))
 		return 0;
 
 	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
@@ -615,7 +616,7 @@ static void presub_replace_inv(
  * of the add instruction must have the constatnt 1 swizzle.  This function
  * does not check const registers to see if their value is 1.0, so it should
  * be called after the constant_folding optimization.
- * @return 
+ * @return
  * 	0 if the ADD instruction is still part of the program.
  * 	1 if the ADD instruction is no longer part of the program.
  */

From 99fba503b112a69a2fc14b7dd40684d9a6a1972a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Sat, 23 Jul 2011 15:57:51 +0200
Subject: [PATCH 089/600] configure.ac: do not check for llvm-config if llvm is
 disabled

NOTE: This is a candidate for the 7.11 branch.
---
 configure.ac | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 0ea264ef042..5c832e64669 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1699,9 +1699,6 @@ dnl Gallium configuration
 dnl
 if test "x$with_gallium_drivers" != x; then
     SRC_DIRS="$SRC_DIRS gallium gallium/winsys gallium/targets"
-    AC_PATH_PROG([LLVM_CONFIG], [llvm-config], [no])
-else
-    LLVM_CONFIG=no
 fi
 
 AC_SUBST([LLVM_CFLAGS])
@@ -1821,6 +1818,8 @@ if test "x$enable_gallium_llvm" = xauto; then
     esac
 fi
 if test "x$enable_gallium_llvm" = xyes; then
+    AC_PATH_PROG([LLVM_CONFIG], [llvm-config], [no])
+
     if test "x$LLVM_CONFIG" != xno; then
 	LLVM_VERSION=`$LLVM_CONFIG --version`
 	LLVM_CFLAGS=`$LLVM_CONFIG --cppflags|sed 's/-DNDEBUG\>//g'`

From c1e591eed41b45c0fcf1dcac8b1b8aaeb6237a38 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 25 Jul 2011 18:33:40 -0700
Subject: [PATCH 090/600] glsl: Correctly return progress from
 lower_variable_index_to_cond_assign

lower_variable_index_to_cond_assign runs until it can't make any more
progress.  It then returns the result of the last pass which will
always be false.  This caused the lowering loop in
_mesa_ir_link_shader to end before doing one last round of
lower_if_to_cond_assign.  This caused several if-statements (resulting
from lower_variable_index_to_cond_assign) to be left in the IR.

In addition to this change, lower_variable_index_to_cond_assign should
take a flag indicating whether or not it should even generate
if-statements.  This is easily controlled by
switch_generator::linear_sequence_max_length.  This would generate
much better code on architectures without any flow contol.

Fixes i915 piglit regressions glsl-texcoord-array and
glsl-fs-vec4-indexing-temp-src.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/lower_variable_index_to_cond_assign.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp
index 7792e6e624f..f8e4a1de428 100644
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -525,10 +525,12 @@ lower_variable_index_to_cond_assign(exec_list *instructions,
     * matrix columns of an array of matrix), each pass will only lower one
     * level of indirection.
     */
+   bool progress_ever = false;
    do {
       v.progress = false;
       visit_list_elements(&v, instructions);
+      progress_ever = v.progress || progress_ever;
    } while (v.progress);
 
-   return v.progress;
+   return progress_ever;
 }

From 3daa2d97eb13f41de4cbab9301a167be85d48642 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 25 Jul 2011 15:39:03 -0700
Subject: [PATCH 091/600] i965/fs: Fix MRT drawing since the m0->m2 move for
 shader debug.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 1d89b8f1d11..eecfc92eb5b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -59,7 +59,8 @@ fs_visitor::generate_fb_write(fs_inst *inst)
 
 	 if (inst->target > 0) {
 	    /* Set the render target index for choosing BLEND_STATE. */
-	    brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2),
+	    brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+					   inst->base_mrf, 2),
 			      BRW_REGISTER_TYPE_UD),
 		    brw_imm_ud(inst->target));
 	 }

From 09916e877fc14723d7950f892e181df9f7d7f36f Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Fri, 22 Jul 2011 15:25:55 -0700
Subject: [PATCH 092/600] mesa: Add utility function to get base format from a
 GL compressed format

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/texcompress.c | 88 +++++++++++++++++++++++++++++++++++++
 src/mesa/main/texcompress.h |  3 ++
 2 files changed, 91 insertions(+)

diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index d820ae92747..040be943e82 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -39,6 +39,94 @@
 #include "texcompress.h"
 
 
+/**
+ * Get the GL base format of a specified GL compressed texture format
+ *
+ * From page 232 of the OpenGL 3.3 (Compatiblity Profile) spec:
+ *
+ *     "Compressed Internal Format      Base Internal Format    Type
+ *     ---------------------------     --------------------    ---------
+ *     COMPRESSED_ALPHA                ALPHA                   Generic
+ *     COMPRESSED_LUMINANCE            LUMINANCE               Generic
+ *     COMPRESSED_LUMINANCE_ALPHA      LUMINANCE_ALPHA         Generic
+ *     COMPRESSED_INTENSITY            INTENSITY               Generic
+ *     COMPRESSED_RED                  RED                     Generic
+ *     COMPRESSED_RG                   RG                      Generic
+ *     COMPRESSED_RGB                  RGB                     Generic
+ *     COMPRESSED_RGBA                 RGBA                    Generic
+ *     COMPRESSED_SRGB                 RGB                     Generic
+ *     COMPRESSED_SRGB_ALPHA           RGBA                    Generic
+ *     COMPRESSED_SLUMINANCE           LUMINANCE               Generic
+ *     COMPRESSED_SLUMINANCE_ALPHA     LUMINANCE_ALPHA         Generic
+ *     COMPRESSED_RED_RGTC1            RED                     Specific
+ *     COMPRESSED_SIGNED_RED_RGTC1     RED                     Specific
+ *     COMPRESSED_RG_RGTC2             RG                      Specific
+ *     COMPRESSED_SIGNED_RG_RGTC2      RG                      Specific"
+ *
+ * \return
+ * The base format of \c format if \c format is a compressed format (either
+ * generic or specific.  Otherwise 0 is returned.
+ */
+GLenum
+_mesa_gl_compressed_format_base_format(GLenum format)
+{
+   switch (format) {
+   case GL_COMPRESSED_RED:
+   case GL_COMPRESSED_RED_RGTC1:
+   case GL_COMPRESSED_SIGNED_RED_RGTC1:
+      return GL_RED;
+
+   case GL_COMPRESSED_RG:
+   case GL_COMPRESSED_RG_RGTC2:
+   case GL_COMPRESSED_SIGNED_RG_RGTC2:
+      return GL_RG;
+
+   case GL_COMPRESSED_RGB:
+   case GL_COMPRESSED_SRGB:
+   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_RGB_FXT1_3DFX:
+   case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+      return GL_RGB;
+
+   case GL_COMPRESSED_RGBA:
+   case GL_COMPRESSED_SRGB_ALPHA:
+   case GL_COMPRESSED_RGBA_BPTC_UNORM_ARB:
+   case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB:
+   case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB:
+   case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB:
+   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+   case GL_COMPRESSED_RGBA_FXT1_3DFX:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+      return GL_RGBA;
+
+   case GL_COMPRESSED_ALPHA:
+      return GL_ALPHA;
+
+   case GL_COMPRESSED_LUMINANCE:
+   case GL_COMPRESSED_SLUMINANCE:
+   case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
+   case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
+      return GL_LUMINANCE;
+
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+   case GL_COMPRESSED_SLUMINANCE_ALPHA:
+   case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
+   case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
+   case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
+      return GL_LUMINANCE_ALPHA;
+
+   case GL_COMPRESSED_INTENSITY:
+      return GL_INTENSITY;
+
+   default:
+      return 0;
+   }
+}
+
 /**
  * Return list of (and count of) all specific texture compression
  * formats that are supported.
diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h
index 19b08bbadf6..922da00912d 100644
--- a/src/mesa/main/texcompress.h
+++ b/src/mesa/main/texcompress.h
@@ -33,6 +33,9 @@ struct gl_context;
 
 #if _HAVE_FULL_GL
 
+extern GLenum
+_mesa_gl_compressed_format_base_format(GLenum format);
+
 extern GLuint
 _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all);
 

From 143b65f7612c255f29d08392192098b1c2bf4b62 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Fri, 22 Jul 2011 15:26:24 -0700
Subject: [PATCH 093/600] mesa: Return the correct internal fmt when a generic
 compressed fmt was used

If an application requests a generic compressed format for a texture
and the driver does not pick a specific compressed format, return the
generic base format (e.g., GL_RGBA) for the GL_TEXTURE_INTERNAL_FORMAT
query.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=3165
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/texparam.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 4b9dcb5d3b5..c4ec29533e2 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -915,9 +915,23 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
             *params = _mesa_compressed_format_to_glenum(ctx, texFormat);
          }
          else {
-            /* return the user's requested internal format */
-            *params = img->InternalFormat;
-         }
+	    /* If the true internal format is not compressed but the user
+	     * requested a generic compressed format, we have to return the
+	     * generic base format that matches.
+	     *
+	     * From page 119 (page 129 of the PDF) of the OpenGL 1.3 spec:
+	     *
+	     *     "If no specific compressed format is available,
+	     *     internalformat is instead replaced by the corresponding base
+	     *     internal format."
+	     *
+	     * Otherwise just return the user's requested internal format
+	     */
+	    const GLenum f =
+	       _mesa_gl_compressed_format_base_format(img->InternalFormat);
+
+	    *params = (f != 0) ? f : img->InternalFormat;
+	 }
          break;
       case GL_TEXTURE_BORDER:
          *params = img->Border;

From b189d1635d89cd7d900e8f9a5eed88d7dc0b46cb Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Fri, 22 Jul 2011 16:45:50 -0700
Subject: [PATCH 094/600] mesa: Make _mesa_get_compressed_formats match the
 texture compression specs

The implementation deviated slightly from the GL_EXT_texture_sRGB spec
and from other implementations.  A giant comment block was added to
justify the somewhat odd behavior of this function.

In addition, the interface had unnecessary cruft.  The 'all' parameter
was false at all callers, so it has been removed.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/get.c         |   4 +-
 src/mesa/main/texcompress.c | 117 +++++++++++++++++++++++++++---------
 src/mesa/main/texcompress.h |   2 +-
 3 files changed, 93 insertions(+), 30 deletions(-)

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 0492e1585c3..d32c68a53a4 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1569,11 +1569,11 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
       break;
 
    case GL_NUM_COMPRESSED_TEXTURE_FORMATS_ARB:
-      v->value_int = _mesa_get_compressed_formats(ctx, NULL, GL_FALSE);
+      v->value_int = _mesa_get_compressed_formats(ctx, NULL);
       break;
    case GL_COMPRESSED_TEXTURE_FORMATS_ARB:
       v->value_int_n.n = 
-	 _mesa_get_compressed_formats(ctx, v->value_int_n.ints, GL_FALSE);
+	 _mesa_get_compressed_formats(ctx, v->value_int_n.ints);
       ASSERT(v->value_int_n.n <= 100);
       break;
 
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 040be943e82..42bd1eee5ca 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -131,16 +131,101 @@ _mesa_gl_compressed_format_base_format(GLenum format)
  * Return list of (and count of) all specific texture compression
  * formats that are supported.
  *
+ * Some formats are \b not returned by this function.  The
+ * \c GL_COMPRESSED_TEXTURE_FORMATS query only returns formats that are
+ * "suitable for general-purpose usage."  All texture compression extensions
+ * have taken this to mean either linear RGB or linear RGBA.
+ *
+ * The GL_ARB_texture_compress_rgtc spec says:
+ *
+ *    "19) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ *        GL_COMPRESSED_TEXTURE_FORMATS queries return the RGTC formats?
+ *
+ *        RESOLVED:  No.
+ *
+ *        The OpenGL 2.1 specification says "The only values returned
+ *        by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those
+ *        corresponding to formats suitable for general-purpose usage.
+ *        The renderer will not enumerate formats with restrictions that
+ *        need to be specifically understood prior to use."
+ *
+ *        Compressed textures with just red or red-green components are
+ *        not general-purpose so should not be returned by these queries
+ *        because they have restrictions.
+ *
+ *        Applications that seek to use the RGTC formats should do so
+ *        by looking for this extension's name in the string returned by
+ *        glGetString(GL_EXTENSIONS) rather than
+ *        what GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ *        GL_COMPRESSED_TEXTURE_FORMATS return."
+ *
+ * There is nearly identical wording in the GL_EXT_texture_compression_rgtc
+ * spec.
+ *
+ * The GL_EXT_texture_rRGB spec says:
+ *
+ *    "22) Should the new COMPRESSED_SRGB_* formats be listed in an
+ *        implementation's GL_COMPRESSED_TEXTURE_FORMATS list?
+ *
+ *        RESOLVED:  No.  Section 3.8.1 says formats listed by
+ *        GL_COMPRESSED_TEXTURE_FORMATS are "suitable for general-purpose
+ *        usage."  The non-linear distribution of red, green, and
+ *        blue for these sRGB compressed formats makes them not really
+ *        general-purpose."
+ *
+ * The GL_EXT_texture_compression_latc spec says:
+ *
+ *    "16) Should the GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ *        GL_COMPRESSED_TEXTURE_FORMATS queries return the LATC formats?
+ *
+ *        RESOLVED:  No.
+ *
+ *        The OpenGL 2.1 specification says "The only values returned
+ *        by this query [GL_COMPRESSED_TEXTURE_FORMATS"] are those
+ *        corresponding to formats suitable for general-purpose usage.
+ *        The renderer will not enumerate formats with restrictions that
+ *        need to be specifically understood prior to use."
+ *
+ *        Historically, OpenGL implementation have advertised the RGB and
+ *        RGBA versions of the S3TC extensions compressed format tokens
+ *        through this mechanism.
+ *
+ *        The specification is not sufficiently clear about what "suitable
+ *        for general-purpose usage" means.  Historically that seems to mean
+ *        unsigned RGB or unsigned RGBA.  The DXT1 format supporting alpha
+ *        (GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) is not exposed in the list (at
+ *        least for NVIDIA drivers) because the alpha is always 1.0 expect
+ *        when it is 0.0 when RGB is required to be black.  NVIDIA's even
+ *        limits itself to true linear RGB or RGBA formats, specifically
+ *        not including EXT_texture_sRGB's sRGB S3TC compressed formats.
+ *
+ *        Adding luminance and luminance-alpha texture formats (and
+ *        certainly signed versions of luminance and luminance-alpha
+ *        formats!) invites potential comptaibility problems with old
+ *        applications using this mechanism since old applications are
+ *        unlikely to expect non-RGB or non-RGBA formats to be advertised
+ *        through this mechanism.  However no specific misinteractions
+ *        with old applications is known.
+ *
+ *        Applications that seek to use the LATC formats should do so
+ *        by looking for this extension's name in the string returned by
+ *        glGetString(GL_EXTENSIONS) rather than
+ *        what GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+ *        GL_COMPRESSED_TEXTURE_FORMATS return."
+ *
+ * There is no formal spec for GL_ATI_texture_compression_3dc.  Since the
+ * formats added by this extension are luminance-alpha formats, it is
+ * reasonable to expect them to follow the same rules as
+ * GL_EXT_texture_compression_latc.  At the very least, Catalyst 11.6 does not
+ * expose the 3dc formats through this mechanism.
+ *
  * \param ctx  the GL context
  * \param formats  the resulting format list (may be NULL).
- * \param all  if true return all formats, even those with  some kind
- *             of restrictions/limitations (See GL_ARB_texture_compression
- *             spec for more info).
  *
  * \return number of formats.
  */
 GLuint
-_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all)
+_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats)
 {
    GLuint n = 0;
    if (ctx->Extensions.TDFX_texture_compression_FXT1) {
@@ -152,24 +237,15 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a
          n += 2;
       }
    }
-   /* don't return RGTC - ARB_texture_compression_rgtc query 19 */
+
    if (ctx->Extensions.EXT_texture_compression_s3tc) {
       if (formats) {
          formats[n++] = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
-         /* This format has some restrictions/limitations and so should
-          * not be returned via the GL_COMPRESSED_TEXTURE_FORMATS query.
-          * Specifically, all transparent pixels become black.  NVIDIA
-          * omits this format too.
-          */
-         if (all)
-             formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
          formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
          formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
       }
       else {
          n += 3;
-         if (all)
-             n += 1;
       }
    }
    if (ctx->Extensions.S3_s3tc) {
@@ -183,19 +259,6 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean a
          n += 4;
       }
    }
-#if FEATURE_EXT_texture_sRGB
-   if (ctx->Extensions.EXT_texture_sRGB) {
-      if (formats) {
-         formats[n++] = GL_COMPRESSED_SRGB_S3TC_DXT1_EXT;
-         formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT;
-         formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT;
-         formats[n++] = GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT;
-      }
-      else {
-         n += 4;
-      }
-   }
-#endif /* FEATURE_EXT_texture_sRGB */
    return n;
 
 #if FEATURE_ES1 || FEATURE_ES2
diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h
index 922da00912d..375cf90c8a2 100644
--- a/src/mesa/main/texcompress.h
+++ b/src/mesa/main/texcompress.h
@@ -37,7 +37,7 @@ extern GLenum
 _mesa_gl_compressed_format_base_format(GLenum format);
 
 extern GLuint
-_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats, GLboolean all);
+_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats);
 
 extern gl_format
 _mesa_glenum_to_compressed_format(GLenum format);

From 95739f19ccc8d3915c437238ca057ddbecd193c6 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 25 Jul 2011 13:30:17 -0500
Subject: [PATCH 095/600] st/mesa: respect force_s3tc_enable environment
 variable

NOTE: This is a candidate for the 7.10 and 7.11 branches.
---
 src/mesa/state_tracker/st_extensions.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 99b231d9706..b5f6d356eb0 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -208,6 +208,15 @@ void st_init_limits(struct st_context *st)
 }
 
 
+static GLboolean st_get_s3tc_override(void)
+{
+   const char *override = _mesa_getenv("force_s3tc_enable");
+   if (override && !strcmp(override, "true"))
+      return GL_TRUE;
+   return GL_FALSE;
+}
+
+
 /**
  * Use pipe_screen::get_param() to query PIPE_CAP_ values to determine
  * which GL extensions are supported.
@@ -426,7 +435,7 @@ void st_init_extensions(struct st_context *st)
    if (screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA,
                                    PIPE_TEXTURE_2D, 0,
                                    PIPE_BIND_SAMPLER_VIEW) &&
-       ctx->Mesa_DXTn) {
+       (ctx->Mesa_DXTn || st_get_s3tc_override())) {
       ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE;
       ctx->Extensions.S3_s3tc = GL_TRUE;
    }

From 860c51d82711936d343b55aafb46befc8c032fe6 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 20 Jul 2011 17:35:22 -0500
Subject: [PATCH 096/600] util: enable S3TC support when the force_s3tc_enable
 env var is set to "true"

NOTE: This is a candidate for the 7.10 and 7.11 branches.
---
 src/gallium/auxiliary/util/u_format_s3tc.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c
index bb989c29d81..d8a7c0d453f 100644
--- a/src/gallium/auxiliary/util/u_format_s3tc.c
+++ b/src/gallium/auxiliary/util/u_format_s3tc.c
@@ -119,8 +119,15 @@ util_format_s3tc_init(void)
 
    library = util_dl_open(DXTN_LIBNAME);
    if (!library) {
-      debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn "
-         "compression/decompression unavailable\n");
+      if (getenv("force_s3tc_enable") &&
+          !strcmp(getenv("force_s3tc_enable"), "true")) {
+         debug_printf("couldn't open " DXTN_LIBNAME ", enabling DXTn due to "
+            "force_s3tc_enable=true environment variable\n");
+         util_format_s3tc_enabled = TRUE;
+      } else {
+         debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn "
+            "compression/decompression unavailable\n");
+      }
       return;
    }
 

From 1c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 26 Jul 2011 21:15:05 +0200
Subject: [PATCH 097/600] r300g: copy the compiler from r300c

What a beast.

r300g doesn't depend on files from r300c anymore, so r300c is now left
to its own fate. BTW 'make test' can be invoked from the gallium/r300
directory to run some compiler unit tests.
---
 src/gallium/drivers/r300/Makefile             |   54 +-
 src/gallium/drivers/r300/SConscript           |   42 +-
 .../drivers/r300/compiler/memory_pool.c       |   97 ++
 .../drivers/r300/compiler/memory_pool.h       |   80 ++
 .../drivers/r300/compiler/r300_fragprog.c     |  338 +++++
 .../drivers/r300/compiler/r300_fragprog.h     |   44 +
 .../r300/compiler/r300_fragprog_emit.c        |  536 ++++++++
 .../r300/compiler/r300_fragprog_swizzle.c     |  243 ++++
 .../r300/compiler/r300_fragprog_swizzle.h     |   39 +
 .../drivers/r300/compiler/r3xx_fragprog.c     |  172 +++
 .../drivers/r300/compiler/r3xx_vertprog.c     | 1045 +++++++++++++++
 .../r300/compiler/r3xx_vertprog_dump.c        |  207 +++
 .../drivers/r300/compiler/r500_fragprog.c     |  539 ++++++++
 .../drivers/r300/compiler/r500_fragprog.h     |   50 +
 .../r300/compiler/r500_fragprog_emit.c        |  678 ++++++++++
 .../drivers/r300/compiler/radeon_code.c       |  187 +++
 .../drivers/r300/compiler/radeon_code.h       |  306 +++++
 .../drivers/r300/compiler/radeon_compiler.c   |  489 +++++++
 .../drivers/r300/compiler/radeon_compiler.h   |  171 +++
 .../r300/compiler/radeon_compiler_util.c      |  701 ++++++++++
 .../r300/compiler/radeon_compiler_util.h      |   89 ++
 .../drivers/r300/compiler/radeon_dataflow.c   |  892 +++++++++++++
 .../drivers/r300/compiler/radeon_dataflow.h   |  134 ++
 .../r300/compiler/radeon_dataflow_deadcode.c  |  359 +++++
 .../r300/compiler/radeon_dataflow_swizzles.c  |  103 ++
 .../r300/compiler/radeon_emulate_branches.c   |  342 +++++
 .../r300/compiler/radeon_emulate_branches.h   |   30 +
 .../r300/compiler/radeon_emulate_loops.c      |  522 ++++++++
 .../r300/compiler/radeon_emulate_loops.h      |   32 +
 .../drivers/r300/compiler/radeon_list.c       |   90 ++
 .../drivers/r300/compiler/radeon_list.h       |   46 +
 .../drivers/r300/compiler/radeon_opcodes.c    |  546 ++++++++
 .../drivers/r300/compiler/radeon_opcodes.h    |  263 ++++
 .../drivers/r300/compiler/radeon_optimize.c   |  700 ++++++++++
 .../r300/compiler/radeon_pair_dead_sources.c  |   62 +
 .../r300/compiler/radeon_pair_regalloc.c      |  706 ++++++++++
 .../r300/compiler/radeon_pair_schedule.c      | 1010 +++++++++++++++
 .../r300/compiler/radeon_pair_translate.c     |  359 +++++
 .../drivers/r300/compiler/radeon_program.c    |  225 ++++
 .../drivers/r300/compiler/radeon_program.h    |  206 +++
 .../r300/compiler/radeon_program_alu.c        | 1154 +++++++++++++++++
 .../r300/compiler/radeon_program_alu.h        |   66 +
 .../r300/compiler/radeon_program_constants.h  |  190 +++
 .../r300/compiler/radeon_program_pair.c       |  239 ++++
 .../r300/compiler/radeon_program_pair.h       |  137 ++
 .../r300/compiler/radeon_program_print.c      |  418 ++++++
 .../r300/compiler/radeon_program_tex.c        |  528 ++++++++
 .../r300/compiler/radeon_program_tex.h        |   39 +
 .../r300/compiler/radeon_remove_constants.c   |  150 +++
 .../r300/compiler/radeon_remove_constants.h   |   35 +
 .../r300/compiler/radeon_rename_regs.c        |   92 ++
 .../r300/compiler/radeon_rename_regs.h        |    9 +
 .../drivers/r300/compiler/radeon_swizzle.h    |   57 +
 .../drivers/r300/compiler/radeon_variable.c   |  517 ++++++++
 .../drivers/r300/compiler/radeon_variable.h   |   89 ++
 .../drivers/r300/compiler/tests/.gitignore    |    1 +
 .../drivers/r300/compiler/tests/Makefile      |   53 +
 .../tests/radeon_compiler_util_tests.c        |   76 ++
 .../r300/compiler/tests/rc_test_helpers.c     |  380 ++++++
 .../r300/compiler/tests/rc_test_helpers.h     |   13 +
 .../drivers/r300/compiler/tests/unit_test.c   |   35 +
 .../drivers/r300/compiler/tests/unit_test.h   |   17 +
 src/gallium/drivers/r300/r300_emit.h          |    1 -
 src/gallium/drivers/r300/r300_fs.c            |    3 +-
 src/gallium/drivers/r300/r300_fs.h            |    2 +-
 src/gallium/drivers/r300/r300_reg.h           |   21 +-
 src/gallium/drivers/r300/r300_tgsi_to_rc.c    |    3 +-
 src/gallium/drivers/r300/r300_vs.c            |    2 +-
 src/gallium/drivers/r300/r300_vs.h            |    2 +-
 69 files changed, 17038 insertions(+), 25 deletions(-)
 create mode 100644 src/gallium/drivers/r300/compiler/memory_pool.c
 create mode 100644 src/gallium/drivers/r300/compiler/memory_pool.h
 create mode 100644 src/gallium/drivers/r300/compiler/r300_fragprog.c
 create mode 100644 src/gallium/drivers/r300/compiler/r300_fragprog.h
 create mode 100644 src/gallium/drivers/r300/compiler/r300_fragprog_emit.c
 create mode 100644 src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c
 create mode 100644 src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h
 create mode 100644 src/gallium/drivers/r300/compiler/r3xx_fragprog.c
 create mode 100644 src/gallium/drivers/r300/compiler/r3xx_vertprog.c
 create mode 100644 src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
 create mode 100644 src/gallium/drivers/r300/compiler/r500_fragprog.c
 create mode 100644 src/gallium/drivers/r300/compiler/r500_fragprog.h
 create mode 100644 src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_code.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_code.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_compiler.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_compiler.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_compiler_util.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_compiler_util.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_dataflow.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_dataflow.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_emulate_branches.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_emulate_branches.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_emulate_loops.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_emulate_loops.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_list.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_list.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_opcodes.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_opcodes.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_optimize.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_pair_schedule.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_pair_translate.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_program.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_program.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_alu.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_alu.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_constants.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_pair.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_pair.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_print.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_tex.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_program_tex.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_remove_constants.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_remove_constants.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_rename_regs.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_rename_regs.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_swizzle.h
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_variable.c
 create mode 100644 src/gallium/drivers/r300/compiler/radeon_variable.h
 create mode 100644 src/gallium/drivers/r300/compiler/tests/.gitignore
 create mode 100644 src/gallium/drivers/r300/compiler/tests/Makefile
 create mode 100644 src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c
 create mode 100644 src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c
 create mode 100644 src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h
 create mode 100644 src/gallium/drivers/r300/compiler/tests/unit_test.c
 create mode 100644 src/gallium/drivers/r300/compiler/tests/unit_test.h

diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index 4088216adcb..4f021276a8f 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -26,19 +26,51 @@ C_SOURCES = \
 	r300_texture.c \
 	r300_texture_desc.c \
 	r300_tgsi_to_rc.c \
-	r300_transfer.c
+	r300_transfer.c \
+	\
+	compiler/radeon_code.c \
+	compiler/radeon_compiler.c \
+	compiler/radeon_compiler_util.c \
+	compiler/radeon_emulate_branches.c \
+	compiler/radeon_emulate_loops.c \
+	compiler/radeon_program.c \
+	compiler/radeon_program_print.c \
+	compiler/radeon_opcodes.c \
+	compiler/radeon_program_alu.c \
+	compiler/radeon_program_pair.c \
+	compiler/radeon_program_tex.c \
+	compiler/radeon_pair_translate.c \
+	compiler/radeon_pair_schedule.c \
+	compiler/radeon_pair_regalloc.c \
+	compiler/radeon_pair_dead_sources.c \
+	compiler/radeon_dataflow.c \
+	compiler/radeon_dataflow_deadcode.c \
+	compiler/radeon_dataflow_swizzles.c \
+	compiler/radeon_list.c \
+	compiler/radeon_optimize.c \
+	compiler/radeon_remove_constants.c \
+	compiler/radeon_rename_regs.c \
+	compiler/radeon_variable.c \
+	compiler/r3xx_fragprog.c \
+	compiler/r300_fragprog.c \
+	compiler/r300_fragprog_swizzle.c \
+	compiler/r300_fragprog_emit.c \
+	compiler/r500_fragprog.c \
+	compiler/r500_fragprog_emit.c \
+	compiler/r3xx_vertprog.c \
+	compiler/r3xx_vertprog_dump.c \
+	compiler/memory_pool.c \
+	\
+	$(TOP)/src/glsl/ralloc.c \
+	$(TOP)/src/mesa/program/register_allocate.c
+
 
 LIBRARY_INCLUDES = \
-	-I$(TOP)/src/mesa/drivers/dri/r300/compiler \
-	-I$(TOP)/include
-
-COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a
-
-EXTRA_OBJECTS = \
-	$(COMPILER_ARCHIVE)
+	-I$(TOP)/include \
+	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/glsl
 
 include ../../Makefile.template
 
-.PHONY: $(COMPILER_ARCHIVE)
-$(COMPILER_ARCHIVE):
-	$(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler
+test: default
+	@$(MAKE) -s -C compiler/tests/
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
index 3af157a7956..7ffd1c27c96 100644
--- a/src/gallium/drivers/r300/SConscript
+++ b/src/gallium/drivers/r300/SConscript
@@ -1,13 +1,11 @@
 Import('*')
 
-r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript')
-
 env = env.Clone()
-# add the paths for r300compiler
 env.Append(CPPPATH = [
-    '#/src/mesa/drivers/dri/r300/compiler', 
     '#/include', 
     '#/src/mesa',
+    '#/src/glsl',
+    '#/src/mapi',
 ])
 
 r300 = env.ConvenienceLibrary(
@@ -36,7 +34,41 @@ r300 = env.ConvenienceLibrary(
         'r300_texture_desc.c',
         'r300_tgsi_to_rc.c',
         'r300_transfer.c',
-    ] + r300compiler) + r300compiler
+        'compiler/radeon_code.c',
+        'compiler/radeon_compiler.c',
+        'compiler/radeon_compiler_util.c',
+        'compiler/radeon_program.c',
+        'compiler/radeon_program_print.c',
+        'compiler/radeon_opcodes.c',
+        'compiler/radeon_program_alu.c',
+        'compiler/radeon_program_pair.c',
+        'compiler/radeon_program_tex.c',
+        'compiler/radeon_pair_translate.c',
+        'compiler/radeon_pair_schedule.c',
+        'compiler/radeon_pair_regalloc.c',
+        'compiler/radeon_pair_dead_sources.c',
+        'compiler/radeon_optimize.c',
+        'compiler/radeon_remove_constants.c',
+        'compiler/radeon_rename_regs.c',
+        'compiler/radeon_emulate_branches.c',
+        'compiler/radeon_emulate_loops.c',
+        'compiler/radeon_dataflow.c',
+        'compiler/radeon_dataflow_deadcode.c',
+        'compiler/radeon_dataflow_swizzles.c',
+        'compiler/radeon_variable.c',
+        'compiler/radeon_list.c',
+        'compiler/r3xx_fragprog.c',
+        'compiler/r300_fragprog.c',
+        'compiler/r300_fragprog_swizzle.c',
+        'compiler/r300_fragprog_emit.c',
+        'compiler/r500_fragprog.c',
+        'compiler/r500_fragprog_emit.c',
+        'compiler/r3xx_vertprog.c',
+        'compiler/r3xx_vertprog_dump.c',
+        'compiler/memory_pool.c',
+        '#/src/glsl/ralloc.c',
+        '#/src/mesa/program/register_allocate.c'
+    ])
 
 env.Alias('r300', r300)
 
diff --git a/src/gallium/drivers/r300/compiler/memory_pool.c b/src/gallium/drivers/r300/compiler/memory_pool.c
new file mode 100644
index 00000000000..ddcdddf9e3c
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/memory_pool.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "memory_pool.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#define POOL_LARGE_ALLOC 4096
+#define POOL_ALIGN 8
+
+
+struct memory_block {
+	struct memory_block * next;
+};
+
+void memory_pool_init(struct memory_pool * pool)
+{
+	memset(pool, 0, sizeof(struct memory_pool));
+}
+
+
+void memory_pool_destroy(struct memory_pool * pool)
+{
+	while(pool->blocks) {
+		struct memory_block * block = pool->blocks;
+		pool->blocks = block->next;
+		free(block);
+	}
+}
+
+static void refill_pool(struct memory_pool * pool)
+{
+	unsigned int blocksize = pool->total_allocated;
+	struct memory_block * newblock;
+
+	if (!blocksize)
+		blocksize = 2*POOL_LARGE_ALLOC;
+
+	newblock = (struct memory_block*)malloc(blocksize);
+	newblock->next = pool->blocks;
+	pool->blocks = newblock;
+
+	pool->head = (unsigned char*)(newblock + 1);
+	pool->end = ((unsigned char*)newblock) + blocksize;
+	pool->total_allocated += blocksize;
+}
+
+
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes)
+{
+	if (bytes < POOL_LARGE_ALLOC) {
+		void * ptr;
+
+		if (pool->head + bytes > pool->end)
+			refill_pool(pool);
+
+		assert(pool->head + bytes <= pool->end);
+
+		ptr = pool->head;
+
+		pool->head += bytes;
+		pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
+
+		return ptr;
+	} else {
+		struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block));
+
+		block->next = pool->blocks;
+		pool->blocks = block;
+
+		return (block + 1);
+	}
+}
+
+
diff --git a/src/gallium/drivers/r300/compiler/memory_pool.h b/src/gallium/drivers/r300/compiler/memory_pool.h
new file mode 100644
index 00000000000..42344d0e3ba
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/memory_pool.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef MEMORY_POOL_H
+#define MEMORY_POOL_H
+
+struct memory_block;
+
+/**
+ * Provides a pool of memory that can quickly be allocated from, at the
+ * cost of being unable to explicitly free one of the allocated blocks.
+ * Instead, the entire pool can be freed at once.
+ *
+ * The idea is to allow one to quickly allocate a flexible amount of
+ * memory during operations like shader compilation while avoiding
+ * reference counting headaches.
+ */
+struct memory_pool {
+	unsigned char * head;
+	unsigned char * end;
+	unsigned int total_allocated;
+	struct memory_block * blocks;
+};
+
+
+void memory_pool_init(struct memory_pool * pool);
+void memory_pool_destroy(struct memory_pool * pool);
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
+
+
+/**
+ * Generic helper for growing an array that has separate size/count
+ * and reserved counters to accomodate up to num new element.
+ *
+ *  type * Array;
+ *  unsigned int Size;
+ *  unsigned int Reserved;
+ *
+ * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k);
+ * assert(Size + k < Reserved);
+ *
+ * \note Size is not changed by this macro.
+ *
+ * \warning Array, Size, Reserved have to be lvalues and may be evaluated
+ * several times.
+ */
+#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \
+	unsigned int _num = (num); \
+	if ((size) + _num > (reserved)) { \
+		unsigned int newreserve = (reserved) * 2; \
+		type * newarray; \
+		if (newreserve < _num) \
+			newreserve = 4 * _num; /* arbitrary heuristic */ \
+		newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \
+		memcpy(newarray, (array), (size) * sizeof(type)); \
+		(array) = newarray; \
+		(reserved) = newreserve; \
+	} \
+} while(0)
+
+#endif /* MEMORY_POOL_H */
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.c b/src/gallium/drivers/r300/compiler/r300_fragprog.c
new file mode 100644
index 00000000000..deba9ca834d
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r300_fragprog.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r300_fragprog.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+static void presub_string(char out[10], unsigned int inst)
+{
+	switch(inst & 0x600000){
+	case R300_ALU_SRCP_1_MINUS_2_SRC0:
+		sprintf(out, "bias");
+		break;
+	case R300_ALU_SRCP_SRC1_MINUS_SRC0:
+		sprintf(out, "sub");
+		break;
+	case R300_ALU_SRCP_SRC1_PLUS_SRC0:
+		sprintf(out, "add");
+		break;
+	case R300_ALU_SRCP_1_MINUS_SRC0:
+		sprintf(out, "inv ");
+		break;
+	}
+}
+
+static int get_msb(unsigned int bit, unsigned int r400_ext_addr)
+{
+	return (r400_ext_addr & bit) ? 1 << 5 : 0;
+}
+
+/* just some random things... */
+void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
+{
+	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+	struct r300_fragment_program_code *code = &compiler->code->code.r300;
+	int n, i, j;
+	static int pc = 0;
+
+	fprintf(stderr, "pc=%d*************************************\n", pc++);
+
+	fprintf(stderr, "Hardware program\n");
+	fprintf(stderr, "----------------\n");
+	if (c->is_r400) {
+		fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext);
+	}
+
+	for (n = 0; n <= (code->config & 3); n++) {
+		uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
+		unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) +
+				(((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6);
+		unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) +
+				(((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6);
+		int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
+		int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
+
+		fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, "
+			"alu_end: %u, tex_end: %d  (code_addr: %08x)\n", n,
+			alu_offset, tex_offset, alu_end, tex_end, code_addr);
+
+		if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
+			fprintf(stderr, "  TEX:\n");
+			for (i = tex_offset;
+			     i <= tex_offset + tex_end;
+			     ++i) {
+				const char *instr;
+
+				switch ((code->tex.
+					 inst[i] >> R300_TEX_INST_SHIFT) &
+					15) {
+				case R300_TEX_OP_LD:
+					instr = "TEX";
+					break;
+				case R300_TEX_OP_KIL:
+					instr = "KIL";
+					break;
+				case R300_TEX_OP_TXP:
+					instr = "TXP";
+					break;
+				case R300_TEX_OP_TXB:
+					instr = "TXB";
+					break;
+				default:
+					instr = "UNKNOWN";
+				}
+
+				fprintf(stderr,
+					"    %s t%i, %c%i, texture[%i]   (%08x)\n",
+					instr,
+					(code->tex.
+					 inst[i] >> R300_DST_ADDR_SHIFT) & 31,
+					't',
+					(code->tex.
+					 inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
+					(code->tex.
+					 inst[i] & R300_TEX_ID_MASK) >>
+					R300_TEX_ID_SHIFT,
+					code->tex.inst[i]);
+			}
+		}
+
+		for (i = alu_offset;
+		     i <= alu_offset + alu_end; ++i) {
+			char srcc[4][10], dstc[20];
+			char srca[4][10], dsta[20];
+			char argc[3][20];
+			char arga[3][20];
+			char flags[5], tmp[10];
+
+			for (j = 0; j < 3; ++j) {
+				int regc = code->alu.inst[i].rgb_addr >> (j * 6);
+				int rega = code->alu.inst[i].alpha_addr >> (j * 6);
+				int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j),
+					code->alu.inst[i].r400_ext_addr);
+				int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j),
+					code->alu.inst[i].r400_ext_addr);
+
+				sprintf(srcc[j], "%c%i",
+					(regc & 32) ? 'c' : 't', (regc & 31) | msbc);
+				sprintf(srca[j], "%c%i",
+					(rega & 32) ? 'c' : 't', (rega & 31) | msba);
+			}
+
+			dstc[0] = 0;
+			sprintf(flags, "%s%s%s",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
+			if (flags[0] != 0) {
+				unsigned int msb = get_msb(
+					R400_ADDRD_EXT_RGB_MSB_BIT,
+					code->alu.inst[i].r400_ext_addr);
+
+				sprintf(dstc, "t%i.%s ",
+					((code->alu.inst[i].
+					 rgb_addr >> R300_ALU_DSTC_SHIFT)
+					 & 31) | msb,
+					flags);
+			}
+			sprintf(flags, "%s%s%s",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
+			if (flags[0] != 0) {
+				sprintf(tmp, "o%i.%s",
+					(code->alu.inst[i].
+					 rgb_addr >> 29) & 3,
+					flags);
+				strcat(dstc, tmp);
+			}
+			/* Presub */
+			presub_string(srcc[3], code->alu.inst[i].rgb_inst);
+			presub_string(srca[3], code->alu.inst[i].alpha_inst);
+
+			dsta[0] = 0;
+			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
+				unsigned int msb = get_msb(
+					R400_ADDRD_EXT_A_MSB_BIT,
+					code->alu.inst[i].r400_ext_addr);
+				sprintf(dsta, "t%i.w ",
+					((code->alu.inst[i].
+					 alpha_addr >> R300_ALU_DSTA_SHIFT) & 31)
+					 | msb);
+			}
+			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
+				sprintf(tmp, "o%i.w ",
+					(code->alu.inst[i].
+					 alpha_addr >> 25) & 3);
+				strcat(dsta, tmp);
+			}
+			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
+				strcat(dsta, "Z");
+			}
+
+			fprintf(stderr,
+				"%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n"
+				"       w: %3s %3s %3s %5s-> %-20s (%08x)\n", i,
+				srcc[0], srcc[1], srcc[2], srcc[3], dstc,
+				code->alu.inst[i].rgb_addr, srca[0], srca[1],
+				srca[2], srca[3], dsta,
+				code->alu.inst[i].alpha_addr);
+
+			for (j = 0; j < 3; ++j) {
+				int regc = code->alu.inst[i].rgb_inst >> (j * 7);
+				int rega = code->alu.inst[i].alpha_inst >> (j * 7);
+				int d;
+				char buf[20];
+
+				d = regc & 31;
+				if (d < 12) {
+					switch (d % 4) {
+					case R300_ALU_ARGC_SRC0C_XYZ:
+						sprintf(buf, "%s.xyz",
+							srcc[d / 4]);
+						break;
+					case R300_ALU_ARGC_SRC0C_XXX:
+						sprintf(buf, "%s.xxx",
+							srcc[d / 4]);
+						break;
+					case R300_ALU_ARGC_SRC0C_YYY:
+						sprintf(buf, "%s.yyy",
+							srcc[d / 4]);
+						break;
+					case R300_ALU_ARGC_SRC0C_ZZZ:
+						sprintf(buf, "%s.zzz",
+							srcc[d / 4]);
+						break;
+					}
+				} else if (d < 15) {
+					sprintf(buf, "%s.www", srca[d - 12]);
+				} else if (d < 20 ) {
+					switch(d) {
+					case R300_ALU_ARGC_SRCP_XYZ:
+						sprintf(buf, "srcp.xyz");
+						break;
+					case R300_ALU_ARGC_SRCP_XXX:
+						sprintf(buf, "srcp.xxx");
+						break;
+					case R300_ALU_ARGC_SRCP_YYY:
+						sprintf(buf, "srcp.yyy");
+						break;
+					case R300_ALU_ARGC_SRCP_ZZZ:
+						sprintf(buf, "srcp.zzz");
+						break;
+					case R300_ALU_ARGC_SRCP_WWW:
+						sprintf(buf, "srcp.www");
+						break;
+					}
+				} else if (d == 20) {
+					sprintf(buf, "0.0");
+				} else if (d == 21) {
+					sprintf(buf, "1.0");
+				} else if (d == 22) {
+					sprintf(buf, "0.5");
+				} else if (d >= 23 && d < 32) {
+					d -= 23;
+					switch (d / 3) {
+					case 0:
+						sprintf(buf, "%s.yzx",
+							srcc[d % 3]);
+						break;
+					case 1:
+						sprintf(buf, "%s.zxy",
+							srcc[d % 3]);
+						break;
+					case 2:
+						sprintf(buf, "%s.Wzy",
+							srcc[d % 3]);
+						break;
+					}
+				} else {
+					sprintf(buf, "%i", d);
+				}
+
+				sprintf(argc[j], "%s%s%s%s",
+					(regc & 32) ? "-" : "",
+					(regc & 64) ? "|" : "",
+					buf, (regc & 64) ? "|" : "");
+
+				d = rega & 31;
+				if (d < 9) {
+					sprintf(buf, "%s.%c", srcc[d / 3],
+						'x' + (char)(d % 3));
+				} else if (d < 12) {
+					sprintf(buf, "%s.w", srca[d - 9]);
+				} else if (d < 16) {
+					switch(d) {
+					case R300_ALU_ARGA_SRCP_X:
+						sprintf(buf, "srcp.x");
+						break;
+					case R300_ALU_ARGA_SRCP_Y:
+						sprintf(buf, "srcp.y");
+						break;
+					case R300_ALU_ARGA_SRCP_Z:
+						sprintf(buf, "srcp.z");
+						break;
+					case R300_ALU_ARGA_SRCP_W:
+						sprintf(buf, "srcp.w");
+						break;
+					}
+				} else if (d == 16) {
+					sprintf(buf, "0.0");
+				} else if (d == 17) {
+					sprintf(buf, "1.0");
+				} else if (d == 18) {
+					sprintf(buf, "0.5");
+				} else {
+					sprintf(buf, "%i", d);
+				}
+
+				sprintf(arga[j], "%s%s%s%s",
+					(rega & 32) ? "-" : "",
+					(rega & 64) ? "|" : "",
+					buf, (rega & 64) ? "|" : "");
+			}
+
+			fprintf(stderr, "     xyz: %8s %8s %8s    op: %08x %s\n"
+				"       w: %8s %8s %8s    op: %08x\n",
+				argc[0], argc[1], argc[2],
+				code->alu.inst[i].rgb_inst,
+				code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ?
+				"NOP" : "",
+				arga[0], arga[1],arga[2],
+				code->alu.inst[i].alpha_inst);
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog.h b/src/gallium/drivers/r300/compiler/r300_fragprog.h
new file mode 100644
index 00000000000..0c88bab2f33
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r300_fragprog.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ *   Ben Skeggs <darktama@iinet.net.au>
+ *   Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R300_FRAGPROG_H_
+#define __R300_FRAGPROG_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+
+extern void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user);
+
+extern void r300FragmentProgramDump(struct radeon_compiler *c, void *user);
+
+#endif
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c
new file mode 100644
index 00000000000..e6fd1fde62d
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c
@@ -0,0 +1,536 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * Emit the r300_fragment_program_code that can be understood by the hardware.
+ * Input is a pre-transformed radeon_program.
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ */
+
+#include "r300_fragprog.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_program_pair.h"
+#include "r300_fragprog_swizzle.h"
+
+
+struct r300_emit_state {
+	struct r300_fragment_program_compiler * compiler;
+
+	unsigned current_node : 2;
+	unsigned node_first_tex : 8;
+	unsigned node_first_alu : 8;
+	uint32_t node_flags;
+};
+
+#define PROG_CODE \
+	struct r300_fragment_program_compiler *c = emit->compiler; \
+	struct r300_fragment_program_code *code = &c->code->code.r300
+
+#define error(fmt, args...) do {			\
+		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
+			__FILE__, __FUNCTION__, ##args);	\
+	} while(0)
+
+static unsigned int get_msbs_alu(unsigned int bits)
+{
+	return (bits >> 6) & 0x7;
+}
+
+/**
+ * @param lsbs The number of least significant bits
+ */
+static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
+{
+	return (bits >> lsbs) & 0x15;
+}
+
+#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
+
+/**
+ * Mark a temporary register as used.
+ */
+static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
+{
+	if (index > code->pixsize)
+		code->pixsize = index;
+}
+
+static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
+{
+	if (!src.Used)
+		return 0;
+
+	if (src.File == RC_FILE_CONSTANT) {
+		return src.Index | (1 << 5);
+	} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
+		use_temporary(code, src.Index);
+		return src.Index & 0x1f;
+	}
+
+	return 0;
+}
+
+
+static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
+	case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
+	case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
+	case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
+	case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
+	default:
+		error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
+		/* fall through */
+	case RC_OPCODE_NOP:
+		/* fall through */
+	case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
+	case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
+	case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
+	case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
+	}
+}
+
+static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
+	case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
+	case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
+	case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
+	case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
+	case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
+	case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
+	default:
+		error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
+		/* fall through */
+	case RC_OPCODE_NOP:
+		/* fall through */
+	case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
+	case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
+	case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
+	case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
+	case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
+	}
+}
+
+/**
+ * Emit one paired ALU instruction.
+ */
+static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
+{
+	int ip;
+	int j;
+	PROG_CODE;
+
+	if (code->alu.length >= c->Base.max_alu_insts) {
+		error("Too many ALU instructions");
+		return 0;
+	}
+
+	ip = code->alu.length++;
+
+	code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
+	code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
+
+	for(j = 0; j < 3; ++j) {
+		/* Set the RGB address */
+		unsigned int src = use_source(code, inst->RGB.Src[j]);
+		unsigned int arg;
+		if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
+			code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
+
+		code->alu.inst[ip].rgb_addr |= src << (6*j);
+
+		/* Set the Alpha address */
+		src = use_source(code, inst->Alpha.Src[j]);
+		if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
+			code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
+
+		code->alu.inst[ip].alpha_addr |= src << (6*j);
+
+		arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+		arg |= inst->RGB.Arg[j].Abs << 6;
+		arg |= inst->RGB.Arg[j].Negate << 5;
+		code->alu.inst[ip].rgb_inst |= arg << (7*j);
+
+		arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
+		arg |= inst->Alpha.Arg[j].Abs << 6;
+		arg |= inst->Alpha.Arg[j].Negate << 5;
+		code->alu.inst[ip].alpha_inst |= arg << (7*j);
+	}
+
+	/* Presubtract */
+	if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+		switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+		case RC_PRESUB_BIAS:
+			code->alu.inst[ip].rgb_inst |=
+						R300_ALU_SRCP_1_MINUS_2_SRC0;
+			break;
+		case RC_PRESUB_ADD:
+			code->alu.inst[ip].rgb_inst |=
+						R300_ALU_SRCP_SRC1_PLUS_SRC0;
+			break;
+		case RC_PRESUB_SUB:
+			code->alu.inst[ip].rgb_inst |=
+						R300_ALU_SRCP_SRC1_MINUS_SRC0;
+			break;
+		case RC_PRESUB_INV:
+			code->alu.inst[ip].rgb_inst |=
+						R300_ALU_SRCP_1_MINUS_SRC0;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+		switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+		case RC_PRESUB_BIAS:
+			code->alu.inst[ip].alpha_inst |=
+						R300_ALU_SRCP_1_MINUS_2_SRC0;
+			break;
+		case RC_PRESUB_ADD:
+			code->alu.inst[ip].alpha_inst |=
+						R300_ALU_SRCP_SRC1_PLUS_SRC0;
+			break;
+		case RC_PRESUB_SUB:
+			code->alu.inst[ip].alpha_inst |=
+						R300_ALU_SRCP_SRC1_MINUS_SRC0;
+			break;
+		case RC_PRESUB_INV:
+			code->alu.inst[ip].alpha_inst |=
+						R300_ALU_SRCP_1_MINUS_SRC0;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (inst->RGB.Saturate)
+		code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
+	if (inst->Alpha.Saturate)
+		code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
+
+	if (inst->RGB.WriteMask) {
+		use_temporary(code, inst->RGB.DestIndex);
+		if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
+			code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
+		code->alu.inst[ip].rgb_addr |=
+			((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
+			(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
+	}
+	if (inst->RGB.OutputWriteMask) {
+		code->alu.inst[ip].rgb_addr |=
+            (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
+            R300_RGB_TARGET(inst->RGB.Target);
+		emit->node_flags |= R300_RGBA_OUT;
+	}
+
+	if (inst->Alpha.WriteMask) {
+		use_temporary(code, inst->Alpha.DestIndex);
+		if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
+			code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
+		code->alu.inst[ip].alpha_addr |=
+			((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
+			R300_ALU_DSTA_REG;
+	}
+	if (inst->Alpha.OutputWriteMask) {
+		code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
+            R300_ALPHA_TARGET(inst->Alpha.Target);
+		emit->node_flags |= R300_RGBA_OUT;
+	}
+	if (inst->Alpha.DepthWriteMask) {
+		code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
+		emit->node_flags |= R300_W_OUT;
+		c->code->writes_depth = 1;
+	}
+	if (inst->Nop)
+		code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
+
+	return 1;
+}
+
+
+/**
+ * Finish the current node without advancing to the next one.
+ */
+static int finish_node(struct r300_emit_state * emit)
+{
+	struct r300_fragment_program_compiler * c = emit->compiler;
+	struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
+	unsigned alu_offset;
+	unsigned alu_end;
+	unsigned tex_offset;
+	unsigned tex_end;
+
+	unsigned int alu_offset_msbs, alu_end_msbs;
+
+	if (code->alu.length == emit->node_first_alu) {
+		/* Generate a single NOP for this node */
+		struct rc_pair_instruction inst;
+		memset(&inst, 0, sizeof(inst));
+		if (!emit_alu(emit, &inst))
+			return 0;
+	}
+
+	alu_offset = emit->node_first_alu;
+	alu_end = code->alu.length - alu_offset - 1;
+	tex_offset = emit->node_first_tex;
+	tex_end = code->tex.length - tex_offset - 1;
+
+	if (code->tex.length == emit->node_first_tex) {
+		if (emit->current_node > 0) {
+			error("Node %i has no TEX instructions", emit->current_node);
+			return 0;
+		}
+
+		tex_end = 0;
+	} else {
+		if (emit->current_node == 0)
+			code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
+	}
+
+	/* Write the config register.
+	 * Note: The order in which the words for each node are written
+	 * is not correct here and needs to be fixed up once we're entirely
+	 * done
+	 *
+	 * Also note that the register specification from AMD is slightly
+	 * incorrect in its description of this register. */
+	code->code_addr[emit->current_node]  =
+			((alu_offset << R300_ALU_START_SHIFT)
+				& R300_ALU_START_MASK)
+			| ((alu_end << R300_ALU_SIZE_SHIFT)
+				& R300_ALU_SIZE_MASK)
+			| ((tex_offset << R300_TEX_START_SHIFT)
+				& R300_TEX_START_MASK)
+			| ((tex_end << R300_TEX_SIZE_SHIFT)
+				& R300_TEX_SIZE_MASK)
+			| emit->node_flags
+			| (get_msbs_tex(tex_offset, 5)
+				<< R400_TEX_START_MSB_SHIFT)
+			| (get_msbs_tex(tex_end, 5)
+				<< R400_TEX_SIZE_MSB_SHIFT)
+			;
+
+	/* Write r400 extended instruction fields.  These will be ignored on
+	 * r300 cards.  */
+	alu_offset_msbs = get_msbs_alu(alu_offset);
+	alu_end_msbs = get_msbs_alu(alu_end);
+	switch(emit->current_node) {
+	case 0:
+		code->r400_code_offset_ext |=
+			alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
+			| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
+		break;
+	case 1:
+		code->r400_code_offset_ext |=
+			alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
+			| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
+		break;
+	case 2:
+		code->r400_code_offset_ext |=
+			alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
+			| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
+		break;
+	case 3:
+		code->r400_code_offset_ext |=
+			alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
+			| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
+		break;
+	}
+	return 1;
+}
+
+
+/**
+ * Begin a block of texture instructions.
+ * Create the necessary indirection.
+ */
+static int begin_tex(struct r300_emit_state * emit)
+{
+	PROG_CODE;
+
+	if (code->alu.length == emit->node_first_alu &&
+	    code->tex.length == emit->node_first_tex) {
+		return 1;
+	}
+
+	if (emit->current_node == 3) {
+		error("Too many texture indirections");
+		return 0;
+	}
+
+	if (!finish_node(emit))
+		return 0;
+
+	emit->current_node++;
+	emit->node_first_tex = code->tex.length;
+	emit->node_first_alu = code->alu.length;
+	emit->node_flags = 0;
+	return 1;
+}
+
+
+static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
+{
+	unsigned int unit;
+	unsigned int dest;
+	unsigned int opcode;
+	PROG_CODE;
+
+	if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
+		error("Too many TEX instructions");
+		return 0;
+	}
+
+	unit = inst->U.I.TexSrcUnit;
+	dest = inst->U.I.DstReg.Index;
+
+	switch(inst->U.I.Opcode) {
+	case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+	case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+	case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+	case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+	default:
+		error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
+		return 0;
+	}
+
+	if (inst->U.I.Opcode == RC_OPCODE_KIL) {
+		unit = 0;
+		dest = 0;
+	} else {
+		use_temporary(code, dest);
+	}
+
+	use_temporary(code, inst->U.I.SrcReg[0].Index);
+
+	code->tex.inst[code->tex.length++] =
+		((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
+			& R300_SRC_ADDR_MASK)
+		| ((dest << R300_DST_ADDR_SHIFT)
+			& R300_DST_ADDR_MASK)
+		| (unit << R300_TEX_ID_SHIFT)
+		| (opcode << R300_TEX_INST_SHIFT)
+		| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
+			R400_SRC_ADDR_EXT_BIT : 0)
+		| (dest >= R300_PFS_NUM_TEMP_REGS ?
+			R400_DST_ADDR_EXT_BIT : 0)
+		;
+	return 1;
+}
+
+
+/**
+ * Final compilation step: Turn the intermediate radeon_program into
+ * machine-readable instructions.
+ */
+void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
+{
+	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+	struct r300_emit_state emit;
+	struct r300_fragment_program_code *code = &compiler->code->code.r300;
+	unsigned int tex_end;
+
+	memset(&emit, 0, sizeof(emit));
+	emit.compiler = compiler;
+
+	memset(code, 0, sizeof(struct r300_fragment_program_code));
+
+	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+	    inst = inst->Next) {
+		if (inst->Type == RC_INSTRUCTION_NORMAL) {
+			if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+				begin_tex(&emit);
+				continue;
+			}
+
+			emit_tex(&emit, inst);
+		} else {
+			emit_alu(&emit, &inst->U.P);
+		}
+	}
+
+	if (code->pixsize >= compiler->Base.max_temp_regs)
+		rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
+
+	if (compiler->Base.Error)
+		return;
+
+	/* Finish the program */
+	finish_node(&emit);
+
+	code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+
+	/* Set r400 extended instruction fields.  These values will be ignored
+	 * on r300 cards. */
+	code->r400_code_offset_ext |=
+		(get_msbs_alu(0)
+				<< R400_ALU_OFFSET_MSB_SHIFT)
+		| (get_msbs_alu(code->alu.length - 1)
+				<< R400_ALU_SIZE_MSB_SHIFT);
+
+	tex_end = code->tex.length ? code->tex.length - 1 : 0;
+	code->code_offset =
+		((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
+			& R300_PFS_CNTL_ALU_OFFSET_MASK)
+		| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
+			& R300_PFS_CNTL_ALU_END_MASK)
+		| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
+			& R300_PFS_CNTL_TEX_OFFSET_MASK)
+		| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
+			& R300_PFS_CNTL_TEX_END_MASK)
+		| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
+		| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
+		;
+
+	if (emit.current_node < 3) {
+		int shift = 3 - emit.current_node;
+		int i;
+		for(i = emit.current_node; i >= 0; --i)
+			code->code_addr[shift + i] = code->code_addr[i];
+		for(i = 0; i < shift; ++i)
+			code->code_addr[i] = 0;
+	}
+
+	if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
+	    || code->alu.length > R300_PFS_MAX_ALU_INST
+	    || code->tex.length > R300_PFS_MAX_TEX_INST) {
+
+		code->r390_mode = 1;
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c
new file mode 100644
index 00000000000..b7bca8c0cfa
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ * Utilities to deal with the somewhat odd restriction on R300 fragment
+ * program swizzles.
+ */
+
+#include "r300_fragprog_swizzle.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+#include "radeon_compiler.h"
+
+#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
+
+struct swizzle_data {
+	unsigned int hash; /**< swizzle value this matches */
+	unsigned int base; /**< base value for hw swizzle */
+	unsigned int stride; /**< difference in base between arg0/1/2 */
+	unsigned int srcp_stride; /**< difference in base between arg0/scrp */
+};
+
+static const struct swizzle_data native_swizzles[] = {
+	{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15},
+	{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15},
+	{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15},
+	{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15},
+	{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7},
+	{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0},
+	{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0},
+	{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0},
+	{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
+	{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
+	{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}
+};
+
+static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
+
+/**
+ * Find a native RGB swizzle that matches the given swizzle.
+ * Returns 0 if none found.
+ */
+static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
+{
+	int i, comp;
+
+	for(i = 0; i < num_native_swizzles; ++i) {
+		const struct swizzle_data* sd = &native_swizzles[i];
+		for(comp = 0; comp < 3; ++comp) {
+			unsigned int swz = GET_SWZ(swizzle, comp);
+			if (swz == RC_SWIZZLE_UNUSED)
+				continue;
+			if (swz != GET_SWZ(sd->hash, comp))
+				break;
+		}
+		if (comp == 3)
+			return sd;
+	}
+
+	return 0;
+}
+
+/**
+ * Determines if the given swizzle is valid for r300/r400.  In most situations
+ * it is better to use r300_swizzle_is_native() which can be accesed via
+ * struct radeon_compiler *c; c->SwizzleCaps->IsNative().
+ */
+int r300_swizzle_is_native_basic(unsigned int swizzle)
+{
+	if(lookup_native_swizzle(swizzle))
+		return 1;
+	else
+		return 0;
+}
+
+/**
+ * Check whether the given instruction supports the swizzle and negate
+ * combinations in the given source register.
+ */
+static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+	const struct swizzle_data* sd;
+	unsigned int relevant;
+	int j;
+
+	if (opcode == RC_OPCODE_KIL ||
+	    opcode == RC_OPCODE_TEX ||
+	    opcode == RC_OPCODE_TXB ||
+	    opcode == RC_OPCODE_TXP) {
+		if (reg.Abs || reg.Negate)
+			return 0;
+
+		for(j = 0; j < 4; ++j) {
+			unsigned int swz = GET_SWZ(reg.Swizzle, j);
+			if (swz == RC_SWIZZLE_UNUSED)
+				continue;
+			if (swz != j)
+				return 0;
+		}
+
+		return 1;
+	}
+
+	relevant = 0;
+
+	for(j = 0; j < 3; ++j)
+		if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
+			relevant |= 1 << j;
+
+	if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+		return 0;
+
+	sd = lookup_native_swizzle(reg.Swizzle);
+	if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0))
+		return 0;
+
+	return 1;
+}
+
+
+static void r300_swizzle_split(
+		struct rc_src_register src, unsigned int mask,
+		struct rc_swizzle_split * split)
+{
+	split->NumPhases = 0;
+
+	while(mask) {
+		unsigned int best_matchcount = 0;
+		unsigned int best_matchmask = 0;
+		int i, comp;
+
+		for(i = 0; i < num_native_swizzles; ++i) {
+			const struct swizzle_data *sd = &native_swizzles[i];
+			unsigned int matchcount = 0;
+			unsigned int matchmask = 0;
+			for(comp = 0; comp < 3; ++comp) {
+				unsigned int swz;
+				if (!GET_BIT(mask, comp))
+					continue;
+				swz = GET_SWZ(src.Swizzle, comp);
+				if (swz == RC_SWIZZLE_UNUSED)
+					continue;
+				if (swz == GET_SWZ(sd->hash, comp)) {
+					/* check if the negate bit of current component
+					 * is the same for already matched components */
+					if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp))))
+						continue;
+
+					matchcount++;
+					matchmask |= 1 << comp;
+				}
+			}
+			if (matchcount > best_matchcount) {
+				best_matchcount = matchcount;
+				best_matchmask = matchmask;
+				if (matchmask == (mask & RC_MASK_XYZ))
+					break;
+			}
+		}
+
+		if (mask & RC_MASK_W)
+			best_matchmask |= RC_MASK_W;
+
+		split->Phase[split->NumPhases++] = best_matchmask;
+		mask &= ~best_matchmask;
+	}
+}
+
+struct rc_swizzle_caps r300_swizzle_caps = {
+	.IsNative = r300_swizzle_is_native,
+	.Split = r300_swizzle_split
+};
+
+
+/**
+ * Translate an RGB (XYZ) swizzle into the hardware code for the given
+ * instruction source.
+ */
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
+{
+	const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
+
+	if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) {
+		fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
+		return 0;
+	}
+
+	if (src == RC_PAIR_PRESUB_SRC) {
+		return sd->base + sd->srcp_stride;
+	} else {
+		return sd->base + src*sd->stride;
+	}
+}
+
+
+/**
+ * Translate an Alpha (W) swizzle into the hardware code for the given
+ * instruction source.
+ */
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
+{
+	unsigned int swz = GET_SWZ(swizzle, 0);
+	if (src == RC_PAIR_PRESUB_SRC) {
+		return R300_ALU_ARGA_SRCP_X + swz;
+	}
+	if (swz < 3)
+		return swz + 3*src;
+
+	switch(swz) {
+	case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
+	case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
+	case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
+	case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF;
+	default: return R300_ALU_ARGA_ONE;
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h
new file mode 100644
index 00000000000..f2635be140d
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r300_fragprog_swizzle.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_FRAGPROG_SWIZZLE_H_
+#define __R300_FRAGPROG_SWIZZLE_H_
+
+#include "radeon_swizzle.h"
+
+extern struct rc_swizzle_caps r300_swizzle_caps;
+
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
+int r300_swizzle_is_native_basic(unsigned int swizzle);
+
+#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c
new file mode 100644
index 00000000000..bb6c010e8e3
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
+#include "radeon_program_alu.h"
+#include "radeon_program_tex.h"
+#include "radeon_rename_regs.h"
+#include "radeon_remove_constants.h"
+#include "r300_fragprog.h"
+#include "r300_fragprog_swizzle.h"
+#include "r500_fragprog.h"
+
+
+static void dataflow_outputs_mark_use(void * userdata, void * data,
+		void (*callback)(void *, unsigned int, unsigned int))
+{
+	struct r300_fragment_program_compiler * c = userdata;
+	callback(data, c->OutputColor[0], RC_MASK_XYZW);
+	callback(data, c->OutputColor[1], RC_MASK_XYZW);
+	callback(data, c->OutputColor[2], RC_MASK_XYZW);
+	callback(data, c->OutputColor[3], RC_MASK_XYZW);
+	callback(data, c->OutputDepth, RC_MASK_W);
+}
+
+static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
+{
+	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
+	struct rc_instruction *rci;
+
+	for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
+		struct rc_sub_instruction * inst = &rci->U.I;
+		unsigned i;
+		const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode);
+
+		if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
+			continue;
+
+		if (inst->DstReg.WriteMask & RC_MASK_Z) {
+			inst->DstReg.WriteMask = RC_MASK_W;
+		} else {
+			inst->DstReg.WriteMask = 0;
+			continue;
+		}
+
+		if (!info->IsComponentwise) {
+			continue;
+		}
+
+		for (i = 0; i < info->NumSrcRegs; i++) {
+			inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]);
+		}
+	}
+}
+
+static int radeon_saturate_output(
+		struct radeon_compiler * c,
+		struct rc_instruction * inst,
+		void* data)
+{
+	const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
+
+	if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT)
+		return 0;
+
+	inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
+	return 1;
+}
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
+{
+	int is_r500 = c->Base.is_r500;
+	int opt = !c->Base.disable_optimizations;
+	int sat_out = c->state.frag_clamp;
+
+	/* Lists of instruction transformations. */
+	struct radeon_program_transformation saturate_output[] = {
+		{ &radeon_saturate_output, c },
+		{ 0, 0 }
+	};
+
+	struct radeon_program_transformation rewrite_tex[] = {
+		{ &radeonTransformTEX, c },
+		{ 0, 0 }
+	};
+
+	struct radeon_program_transformation rewrite_if[] = {
+		{ &r500_transform_IF, 0 },
+		{0, 0}
+	};
+
+	struct radeon_program_transformation native_rewrite_r500[] = {
+		{ &radeonTransformALU, 0 },
+		{ &radeonTransformDeriv, 0 },
+		{ &radeonTransformTrigScale, 0 },
+		{ 0, 0 }
+	};
+
+	struct radeon_program_transformation native_rewrite_r300[] = {
+		{ &radeonTransformALU, 0 },
+		{ &r300_transform_trig_simple, 0 },
+		{ 0, 0 }
+	};
+
+	/* List of compiler passes. */
+	struct radeon_compiler_pass fs_list[] = {
+		/* NAME				DUMP PREDICATE	FUNCTION			PARAM */
+		{"rewrite depth out",		1, 1,		rc_rewrite_depth_out,		NULL},
+		/* This transformation needs to be done before any of the IF
+		 * instructions are modified. */
+		{"transform KILP",		1, 1,		rc_transform_KILP,		NULL},
+		{"unroll loops",		1, is_r500,	rc_unroll_loops,		NULL},
+		{"transform loops",		1, !is_r500,	rc_transform_loops,		NULL},
+		{"emulate branches",		1, !is_r500,	rc_emulate_branches,		NULL},
+		{"saturate output writes",	1, sat_out,	rc_local_transform,		saturate_output},
+		{"transform TEX",		1, 1,		rc_local_transform,		rewrite_tex},
+		{"transform IF",		1, is_r500,	rc_local_transform,		rewrite_if},
+		{"native rewrite",		1, is_r500,	rc_local_transform,		native_rewrite_r500},
+		{"native rewrite",		1, !is_r500,	rc_local_transform,		native_rewrite_r300},
+		{"deadcode",			1, opt,		rc_dataflow_deadcode,		dataflow_outputs_mark_use},
+		{"emulate loops",		1, !is_r500,	rc_emulate_loops,		NULL},
+		{"dataflow optimize",		1, opt,		rc_optimize,			NULL},
+		{"dataflow swizzles",		1, 1,		rc_dataflow_swizzles,		NULL},
+		{"dead constants",		1, 1,		rc_remove_unused_constants,	&c->code->constants_remap_table},
+		/* This pass makes it easier for the scheduler to group TEX
+		 * instructions and reduces the chances of creating too
+		 * many texture indirections.*/
+		{"register rename",		1, !is_r500,	rc_rename_regs,			NULL},
+		{"pair translate",		1, 1,		rc_pair_translate,		NULL},
+		{"pair scheduling",		1, 1,		rc_pair_schedule,		NULL},
+		{"dead sources",		1, 1,		rc_pair_remove_dead_sources, NULL},
+		{"register allocation",		1, 1,		rc_pair_regalloc,		&opt},
+		{"final code validation",	0, 1,		rc_validate_final_shader,	NULL},
+		{"machine code generation",	0, is_r500,	r500BuildFragmentProgramHwCode,	NULL},
+		{"machine code generation",	0, !is_r500,	r300BuildFragmentProgramHwCode,	NULL},
+		{"dump machine code",		0, is_r500  && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL},
+		{"dump machine code",		0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL},
+		{NULL, 0, 0, NULL, NULL}
+	};
+
+	c->Base.type = RC_FRAGMENT_PROGRAM;
+	c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps;
+
+	rc_run_compiler(&c->Base, fs_list);
+
+	rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+}
diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
new file mode 100644
index 00000000000..654f9a070d5
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
@@ -0,0 +1,1045 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_program_alu.h"
+#include "radeon_swizzle.h"
+#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
+#include "radeon_remove_constants.h"
+
+struct loop {
+	int BgnLoop;
+
+};
+
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y)	\
+	(PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]),	\
+			   t_swizzle(y),	\
+			   t_swizzle(y),	\
+			   t_swizzle(y),	\
+			   t_swizzle(y),	\
+			   t_src_class(vpi->SrcReg[x].File), \
+			   RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
+
+
+static unsigned long t_dst_mask(unsigned int mask)
+{
+	/* RC_MASK_* is equivalent to VSF_FLAG_* */
+	return mask & RC_MASK_XYZW;
+}
+
+static unsigned long t_dst_class(rc_register_file file)
+{
+	switch (file) {
+	default:
+		fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+		/* fall-through */
+	case RC_FILE_TEMPORARY:
+		return PVS_DST_REG_TEMPORARY;
+	case RC_FILE_OUTPUT:
+		return PVS_DST_REG_OUT;
+	case RC_FILE_ADDRESS:
+		return PVS_DST_REG_A0;
+	}
+}
+
+static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
+				 struct rc_dst_register *dst)
+{
+	if (dst->File == RC_FILE_OUTPUT)
+		return vp->outputs[dst->Index];
+
+	return dst->Index;
+}
+
+static unsigned long t_src_class(rc_register_file file)
+{
+	switch (file) {
+	default:
+		fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+		/* fall-through */
+	case RC_FILE_NONE:
+	case RC_FILE_TEMPORARY:
+		return PVS_SRC_REG_TEMPORARY;
+	case RC_FILE_INPUT:
+		return PVS_SRC_REG_INPUT;
+	case RC_FILE_CONSTANT:
+		return PVS_SRC_REG_CONSTANT;
+	}
+}
+
+static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
+{
+	unsigned long aclass = t_src_class(a.File);
+	unsigned long bclass = t_src_class(b.File);
+
+	if (aclass != bclass)
+		return 0;
+	if (aclass == PVS_SRC_REG_TEMPORARY)
+		return 0;
+
+	if (a.RelAddr || b.RelAddr)
+		return 1;
+	if (a.Index != b.Index)
+		return 1;
+
+	return 0;
+}
+
+static inline unsigned long t_swizzle(unsigned int swizzle)
+{
+	/* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+	return swizzle;
+}
+
+static unsigned long t_src_index(struct r300_vertex_program_code *vp,
+				 struct rc_src_register *src)
+{
+	if (src->File == RC_FILE_INPUT) {
+		assert(vp->inputs[src->Index] != -1);
+		return vp->inputs[src->Index];
+	} else {
+		if (src->Index < 0) {
+			fprintf(stderr,
+				"negative offsets for indirect addressing do not work.\n");
+			return 0;
+		}
+		return src->Index;
+	}
+}
+
+/* these two functions should probably be merged... */
+
+static unsigned long t_src(struct r300_vertex_program_code *vp,
+			   struct rc_src_register *src)
+{
+	/* src->Negate uses the RC_MASK_ flags from program_instruction.h,
+	 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+	 */
+	return PVS_SRC_OPERAND(t_src_index(vp, src),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 1)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 2)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 3)),
+			       t_src_class(src->File),
+			       src->Negate) |
+	       (src->RelAddr << 4) | (src->Abs << 3);
+}
+
+static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
+				  struct rc_src_register *src)
+{
+	/* src->Negate uses the RC_MASK_ flags from program_instruction.h,
+	 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+	 */
+	return PVS_SRC_OPERAND(t_src_index(vp, src),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_src_class(src->File),
+			       src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+	       (src->RelAddr << 4) | (src->Abs << 3);
+}
+
+static int valid_dst(struct r300_vertex_program_code *vp,
+			   struct rc_dst_register *dst)
+{
+	if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
+		return 0;
+	} else if (dst->File == RC_FILE_ADDRESS) {
+		assert(dst->Index == 0);
+	}
+
+	return 1;
+}
+
+static void ei_vector1(struct r300_vertex_program_code *vp,
+				unsigned int hw_opcode,
+				struct rc_sub_instruction *vpi,
+				unsigned int * inst)
+{
+	inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+				     0,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	inst[1] = t_src(vp, &vpi->SrcReg[0]);
+	inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+	inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
+}
+
+static void ei_vector2(struct r300_vertex_program_code *vp,
+				unsigned int hw_opcode,
+				struct rc_sub_instruction *vpi,
+				unsigned int * inst)
+{
+	inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+				     0,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	inst[1] = t_src(vp, &vpi->SrcReg[0]);
+	inst[2] = t_src(vp, &vpi->SrcReg[1]);
+	inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
+}
+
+static void ei_math1(struct r300_vertex_program_code *vp,
+				unsigned int hw_opcode,
+				struct rc_sub_instruction *vpi,
+				unsigned int * inst)
+{
+	inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+				     1,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+	inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+	inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
+}
+
+static void ei_lit(struct r300_vertex_program_code *vp,
+				      struct rc_sub_instruction *vpi,
+				      unsigned int * inst)
+{
+	//LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+
+	inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+				     1,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	/* NOTE: Users swizzling might not work. */
+	inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
+				  PVS_SRC_SELECT_FORCE_0,	// Z
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
+				  t_src_class(vpi->SrcReg[0].File),
+				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+	    (vpi->SrcReg[0].RelAddr << 4);
+	inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
+				  PVS_SRC_SELECT_FORCE_0,	// Z
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
+				  t_src_class(vpi->SrcReg[0].File),
+				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+	    (vpi->SrcReg[0].RelAddr << 4);
+	inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
+				  PVS_SRC_SELECT_FORCE_0,	// Z
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
+				  t_src_class(vpi->SrcReg[0].File),
+				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+	    (vpi->SrcReg[0].RelAddr << 4);
+}
+
+static void ei_mad(struct r300_vertex_program_code *vp,
+				      struct rc_sub_instruction *vpi,
+				      unsigned int * inst)
+{
+	unsigned int i;
+	/* Remarks about hardware limitations of MAD
+	 * (please preserve this comment, as this information is _NOT_
+	 * in the documentation provided by AMD).
+	 *
+	 * As described in the documentation, MAD with three unique temporary
+	 * source registers requires the use of the macro version.
+	 *
+	 * However (and this is not mentioned in the documentation), apparently
+	 * the macro version is _NOT_ a full superset of the normal version.
+	 * In particular, the macro version does not always work when relative
+	 * addressing is used in the source operands.
+	 *
+	 * This limitation caused incorrect rendering in Sauerbraten's OpenGL
+	 * assembly shader path when using medium quality animations
+	 * (i.e. animations with matrix blending instead of quaternion blending).
+	 *
+	 * Unfortunately, I (nha) have been unable to extract a Piglit regression
+	 * test for this issue - for some reason, it is possible to have vertex
+	 * programs whose prefix is *exactly* the same as the prefix of the
+	 * offending program in Sauerbraten up to the offending instruction
+	 * without causing any trouble.
+	 *
+	 * Bottom line: Only use the macro version only when really necessary;
+	 * according to AMD docs, this should improve performance by one clock
+	 * as a nice side bonus.
+	 */
+	if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
+	    vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
+	    vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
+	    vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
+	    vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
+	    vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
+		inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+				0,
+				1,
+				t_dst_index(vp, &vpi->DstReg),
+				t_dst_mask(vpi->DstReg.WriteMask),
+				t_dst_class(vpi->DstReg.File));
+	} else {
+		inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+				0,
+				0,
+				t_dst_index(vp, &vpi->DstReg),
+				t_dst_mask(vpi->DstReg.WriteMask),
+				t_dst_class(vpi->DstReg.File));
+
+		/* Arguments with constant swizzles still count as a unique
+		 * temporary, so we should make sure these arguments share a
+		 * register index with one of the other arguments. */
+		for (i = 0; i < 3; i++) {
+			unsigned int j;
+			if (vpi->SrcReg[i].File != RC_FILE_NONE)
+				continue;
+
+			for (j = 0; j < 3; j++) {
+				if (i != j) {
+					vpi->SrcReg[i].Index =
+						vpi->SrcReg[j].Index;
+					break;
+				}
+			}
+		}
+	}
+	inst[1] = t_src(vp, &vpi->SrcReg[0]);
+	inst[2] = t_src(vp, &vpi->SrcReg[1]);
+	inst[3] = t_src(vp, &vpi->SrcReg[2]);
+}
+
+static void ei_pow(struct r300_vertex_program_code *vp,
+				      struct rc_sub_instruction *vpi,
+				      unsigned int * inst)
+{
+	inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+				     1,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+	inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+	inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
+}
+
+static void mark_write(void * userdata,	struct rc_instruction * inst,
+		rc_register_file file,	unsigned int index, unsigned int mask)
+{
+	unsigned int * writemasks = userdata;
+
+	if (file != RC_FILE_TEMPORARY)
+		return;
+
+	if (index >= R300_VS_MAX_TEMPS)
+		return;
+
+	writemasks[index] |= mask;
+}
+
+static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
+{
+	return PVS_SRC_OPERAND(compiler->PredicateIndex,
+		t_swizzle(RC_SWIZZLE_ZERO),
+		t_swizzle(RC_SWIZZLE_ZERO),
+		t_swizzle(RC_SWIZZLE_ZERO),
+		t_swizzle(RC_SWIZZLE_W),
+		t_src_class(RC_FILE_TEMPORARY),
+		0);
+}
+
+static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
+					unsigned int hw_opcode, int is_math)
+{
+	return PVS_OP_DST_OPERAND(hw_opcode,
+	     is_math,
+	     0,
+	     compiler->PredicateIndex,
+	     RC_MASK_W,
+	     t_dst_class(RC_FILE_TEMPORARY));
+
+}
+
+static void ei_if(struct r300_vertex_program_compiler * compiler,
+					struct rc_instruction *rci,
+					unsigned int * inst,
+					unsigned int branch_depth)
+{
+	unsigned int predicate_opcode;
+	int is_math = 0;
+
+	if (!compiler->Base.is_r500) {
+		rc_error(&compiler->Base,"Opcode IF not supported\n");
+		return;
+	}
+
+	/* Reserve a temporary to use as our predicate stack counter, if we
+	 * don't already have one. */
+	if (!compiler->PredicateMask) {
+		unsigned int writemasks[RC_REGISTER_MAX_INDEX];
+		struct rc_instruction * inst;
+		unsigned int i;
+		memset(writemasks, 0, sizeof(writemasks));
+		for(inst = compiler->Base.Program.Instructions.Next;
+				inst != &compiler->Base.Program.Instructions;
+							inst = inst->Next) {
+			rc_for_all_writes_mask(inst, mark_write, writemasks);
+		}
+		for(i = 0; i < compiler->Base.max_temp_regs; i++) {
+			unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
+			/* Only the W component can be used fo the predicate
+			 * stack counter. */
+			if (mask & RC_MASK_W) {
+				compiler->PredicateMask = RC_MASK_W;
+				compiler->PredicateIndex = i;
+				break;
+			}
+		}
+		if (i == compiler->Base.max_temp_regs) {
+			rc_error(&compiler->Base, "No free temporary to use for"
+					" predicate stack counter.\n");
+			return;
+		}
+	}
+	predicate_opcode =
+			branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
+
+	rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
+	if (branch_depth == 0) {
+		is_math = 1;
+		predicate_opcode = ME_PRED_SET_NEQ;
+		inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+		inst[2] = 0;
+	} else {
+		predicate_opcode = VE_PRED_SET_NEQ_PUSH;
+		inst[1] = t_pred_src(compiler);
+		inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+	}
+
+	inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
+	inst[3] = 0;
+
+}
+
+static void ei_else(struct r300_vertex_program_compiler * compiler,
+							unsigned int * inst)
+{
+	if (!compiler->Base.is_r500) {
+		rc_error(&compiler->Base,"Opcode ELSE not supported\n");
+		return;
+	}
+	inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
+	inst[1] = t_pred_src(compiler);
+	inst[2] = 0;
+	inst[3] = 0;
+}
+
+static void ei_endif(struct r300_vertex_program_compiler *compiler,
+							unsigned int * inst)
+{
+	if (!compiler->Base.is_r500) {
+		rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
+		return;
+	}
+	inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
+	inst[1] = t_pred_src(compiler);
+	inst[2] = 0;
+	inst[3] = 0;
+}
+
+static void translate_vertex_program(struct radeon_compiler *c, void *user)
+{
+	struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
+	struct rc_instruction *rci;
+
+	struct loop * loops = NULL;
+	int current_loop_depth = 0;
+	int loops_reserved = 0;
+
+	unsigned int branch_depth = 0;
+
+	compiler->code->pos_end = 0;	/* Not supported yet */
+	compiler->code->length = 0;
+	compiler->code->num_temporaries = 0;
+
+	compiler->SetHwInputOutput(compiler);
+
+	for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
+		struct rc_sub_instruction *vpi = &rci->U.I;
+		unsigned int *inst = compiler->code->body.d + compiler->code->length;
+		const struct rc_opcode_info *info = rc_get_opcode_info(vpi->Opcode);
+
+		/* Skip instructions writing to non-existing destination */
+		if (!valid_dst(compiler->code, &vpi->DstReg))
+			continue;
+
+		if (info->HasDstReg) {
+			/* Neither is Saturate. */
+			if (vpi->SaturateMode != RC_SATURATE_NONE) {
+				rc_error(&compiler->Base, "Vertex program does not support the Saturate "
+					 "modifier (yet).\n");
+			}
+		}
+
+		if (compiler->code->length >= c->max_alu_insts * 4) {
+			rc_error(&compiler->Base, "Vertex program has too many instructions\n");
+			return;
+		}
+
+		assert(compiler->Base.is_r500 ||
+		       (vpi->Opcode != RC_OPCODE_SEQ &&
+			vpi->Opcode != RC_OPCODE_SNE));
+
+		switch (vpi->Opcode) {
+		case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+		case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+		case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
+		case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+		case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+		case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
+		case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
+		case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+		case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+		case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+		case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
+		case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+		case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+		case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+		case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+		case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+		case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+		case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+		case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+		case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+		case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+		case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+		case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
+		case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+		case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
+		case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+		case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
+		case RC_OPCODE_BGNLOOP:
+		{
+			struct loop * l;
+
+			if ((!compiler->Base.is_r500
+				&& loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
+				|| loops_reserved >= R500_VS_MAX_FC_DEPTH) {
+				rc_error(&compiler->Base,
+						"Loops are nested too deep.");
+				return;
+			}
+			memory_pool_array_reserve(&compiler->Base.Pool,
+					struct loop, loops, current_loop_depth,
+					loops_reserved, 1);
+			l = &loops[current_loop_depth++];
+			memset(l , 0, sizeof(struct loop));
+			l->BgnLoop = (compiler->code->length / 4);
+			continue;
+		}
+		case RC_OPCODE_ENDLOOP:
+		{
+			struct loop * l;
+			unsigned int act_addr;
+			unsigned int last_addr;
+			unsigned int ret_addr;
+
+			assert(loops);
+			l = &loops[current_loop_depth - 1];
+			act_addr = l->BgnLoop - 1;
+			last_addr = (compiler->code->length / 4) - 1;
+			ret_addr = l->BgnLoop;
+
+			if (loops_reserved >= R300_VS_MAX_FC_OPS) {
+				rc_error(&compiler->Base,
+					"Too many flow control instructions.");
+				return;
+			}
+			if (compiler->Base.is_r500) {
+				compiler->code->fc_op_addrs.r500
+					[compiler->code->num_fc_ops].lw =
+					R500_PVS_FC_ACT_ADRS(act_addr)
+					| R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
+					;
+				compiler->code->fc_op_addrs.r500
+					[compiler->code->num_fc_ops].uw =
+					R500_PVS_FC_LAST_INST(last_addr)
+					| R500_PVS_FC_RTN_INST(ret_addr)
+					;
+			} else {
+				compiler->code->fc_op_addrs.r300
+					[compiler->code->num_fc_ops] =
+					R300_PVS_FC_ACT_ADRS(act_addr)
+					| R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
+					| R300_PVS_FC_LAST_INST(last_addr)
+					| R300_PVS_FC_RTN_INST(ret_addr)
+					;
+			}
+			compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
+				R300_PVS_FC_LOOP_INIT_VAL(0x0)
+				| R300_PVS_FC_LOOP_STEP_VAL(0x1)
+				;
+			compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
+						compiler->code->num_fc_ops);
+			compiler->code->num_fc_ops++;
+			current_loop_depth--;
+			continue;
+		}
+
+		default:
+			rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
+			return;
+		}
+
+		/* Non-flow control instructions that are inside an if statement
+		 * need to pay attention to the predicate bit. */
+		if (branch_depth
+			&& vpi->Opcode != RC_OPCODE_IF
+			&& vpi->Opcode != RC_OPCODE_ELSE
+			&& vpi->Opcode != RC_OPCODE_ENDIF) {
+
+			inst[0] |= (PVS_DST_PRED_ENABLE_MASK
+						<< PVS_DST_PRED_ENABLE_SHIFT);
+			inst[0] |= (PVS_DST_PRED_SENSE_MASK
+						<< PVS_DST_PRED_SENSE_SHIFT);
+		}
+
+		/* Update the number of temporaries. */
+		if (info->HasDstReg && vpi->DstReg.File == RC_FILE_TEMPORARY &&
+		    vpi->DstReg.Index >= compiler->code->num_temporaries)
+			compiler->code->num_temporaries = vpi->DstReg.Index + 1;
+
+		for (unsigned i = 0; i < info->NumSrcRegs; i++)
+			if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY &&
+			    vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
+				compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
+
+		if (compiler->PredicateMask)
+			if (compiler->PredicateIndex >= compiler->code->num_temporaries)
+				compiler->code->num_temporaries = compiler->PredicateIndex + 1;
+
+		if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
+			rc_error(&compiler->Base, "Too many temporaries.\n");
+			return;
+		}
+
+		compiler->code->length += 4;
+
+		if (compiler->Base.Error)
+			return;
+	}
+}
+
+struct temporary_allocation {
+	unsigned int Allocated:1;
+	unsigned int HwTemp:15;
+	struct rc_instruction * LastRead;
+};
+
+static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
+{
+	struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
+	struct rc_instruction *inst;
+	struct rc_instruction *end_loop = NULL;
+	unsigned int num_orig_temps = 0;
+	char hwtemps[RC_REGISTER_MAX_INDEX];
+	struct temporary_allocation * ta;
+	unsigned int i, j;
+
+	memset(hwtemps, 0, sizeof(hwtemps));
+
+	rc_recompute_ips(c);
+
+	/* Pass 1: Count original temporaries. */
+	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		for (i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+				if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
+					num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
+			}
+		}
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+				if (inst->U.I.DstReg.Index >= num_orig_temps)
+					num_orig_temps = inst->U.I.DstReg.Index + 1;
+			}
+		}
+	}
+
+	ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
+			sizeof(struct temporary_allocation) * num_orig_temps);
+	memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
+
+	/* Pass 2: Determine original temporary lifetimes */
+	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		/* Instructions inside of loops need to use the ENDLOOP
+		 * instruction as their LastRead. */
+		if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+			int endloops = 1;
+			struct rc_instruction * ptr;
+			for(ptr = inst->Next;
+				ptr != &compiler->Base.Program.Instructions;
+							ptr = ptr->Next){
+				if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+					endloops++;
+				} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+					endloops--;
+					if (endloops <= 0) {
+						end_loop = ptr;
+						break;
+					}
+				}
+			}
+		}
+
+		if (inst == end_loop) {
+			end_loop = NULL;
+			continue;
+		}
+
+		for (i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+				ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst;
+			}
+		}
+	}
+
+	/* Pass 3: Register allocation */
+	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		for (i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+				unsigned int orig = inst->U.I.SrcReg[i].Index;
+				inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
+
+				if (ta[orig].Allocated && inst == ta[orig].LastRead)
+					hwtemps[ta[orig].HwTemp] = 0;
+			}
+		}
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+				unsigned int orig = inst->U.I.DstReg.Index;
+
+				if (!ta[orig].Allocated) {
+					for(j = 0; j < c->max_temp_regs; ++j) {
+						if (!hwtemps[j])
+							break;
+					}
+					ta[orig].Allocated = 1;
+					ta[orig].HwTemp = j;
+					hwtemps[ta[orig].HwTemp] = 1;
+				}
+
+				inst->U.I.DstReg.Index = ta[orig].HwTemp;
+			}
+		}
+	}
+}
+
+/**
+ * R3xx-R4xx vertex engine does not support the Absolute source operand modifier
+ * and the Saturate opcode modifier. Only Absolute is currently transformed.
+ */
+static int transform_nonnative_modifiers(
+	struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	void* unused)
+{
+	const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	unsigned i;
+
+	/* Transform ABS(a) to MAX(a, -a). */
+	for (i = 0; i < opcode->NumSrcRegs; i++) {
+		if (inst->U.I.SrcReg[i].Abs) {
+			struct rc_instruction *new_inst;
+			unsigned temp;
+
+			inst->U.I.SrcReg[i].Abs = 0;
+
+			temp = rc_find_free_temporary(c);
+
+			new_inst = rc_insert_new_instruction(c, inst->Prev);
+			new_inst->U.I.Opcode = RC_OPCODE_MAX;
+			new_inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			new_inst->U.I.DstReg.Index = temp;
+			new_inst->U.I.SrcReg[0] = inst->U.I.SrcReg[i];
+			new_inst->U.I.SrcReg[1] = inst->U.I.SrcReg[i];
+			new_inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+
+			memset(&inst->U.I.SrcReg[i], 0, sizeof(inst->U.I.SrcReg[i]));
+			inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+			inst->U.I.SrcReg[i].Index = temp;
+			inst->U.I.SrcReg[i].Swizzle = RC_SWIZZLE_XYZW;
+		}
+	}
+	return 1;
+}
+
+/**
+ * Vertex engine cannot read two inputs or two constants at the same time.
+ * Introduce intermediate MOVs to temporary registers to account for this.
+ */
+static int transform_source_conflicts(
+	struct radeon_compiler *c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+	if (opcode->NumSrcRegs == 3) {
+		if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
+		    || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
+			int tmpreg = rc_find_free_temporary(c);
+			struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.DstReg.Index = tmpreg;
+			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+
+			reset_srcreg(&inst->U.I.SrcReg[2]);
+			inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+			inst->U.I.SrcReg[2].Index = tmpreg;
+		}
+	}
+
+	if (opcode->NumSrcRegs >= 2) {
+		if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
+			int tmpreg = rc_find_free_temporary(c);
+			struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.DstReg.Index = tmpreg;
+			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+
+			reset_srcreg(&inst->U.I.SrcReg[1]);
+			inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+			inst->U.I.SrcReg[1].Index = tmpreg;
+		}
+	}
+
+	return 1;
+}
+
+static void rc_vs_add_artificial_outputs(struct radeon_compiler *c, void *user)
+{
+	struct r300_vertex_program_compiler * compiler = (struct r300_vertex_program_compiler*)c;
+	int i;
+
+	for(i = 0; i < 32; ++i) {
+		if ((compiler->RequiredOutputs & (1 << i)) &&
+		    !(compiler->Base.Program.OutputsWritten & (1 << i))) {
+			struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+
+			inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+			inst->U.I.DstReg.Index = i;
+			inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+			inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
+			inst->U.I.SrcReg[0].Index = 0;
+			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+			compiler->Base.Program.OutputsWritten |= 1 << i;
+		}
+	}
+}
+
+static void dataflow_outputs_mark_used(void * userdata, void * data,
+		void (*callback)(void *, unsigned int, unsigned int))
+{
+	struct r300_vertex_program_compiler * c = userdata;
+	int i;
+
+	for(i = 0; i < 32; ++i) {
+		if (c->RequiredOutputs & (1 << i))
+			callback(data, i, RC_MASK_XYZW);
+	}
+}
+
+static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+	(void) opcode;
+	(void) reg;
+
+	return 1;
+}
+
+static void transform_negative_addressing(struct r300_vertex_program_compiler *c,
+					  struct rc_instruction *arl,
+					  struct rc_instruction *end,
+					  int min_offset)
+{
+	struct rc_instruction *inst, *add;
+	unsigned const_swizzle;
+
+	/* Transform ARL */
+	add = rc_insert_new_instruction(&c->Base, arl->Prev);
+	add->U.I.Opcode = RC_OPCODE_ADD;
+	add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base);
+	add->U.I.DstReg.WriteMask = RC_MASK_X;
+	add->U.I.SrcReg[0] = arl->U.I.SrcReg[0];
+	add->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+	add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants,
+								     min_offset, &const_swizzle);
+	add->U.I.SrcReg[1].Swizzle = const_swizzle;
+
+	arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index;
+	arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX;
+
+	/* Rewrite offsets up to and excluding inst. */
+	for (inst = arl->Next; inst != end; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		for (unsigned i = 0; i < opcode->NumSrcRegs; i++)
+			if (inst->U.I.SrcReg[i].RelAddr)
+				inst->U.I.SrcReg[i].Index -= min_offset;
+	}
+}
+
+static void rc_emulate_negative_addressing(struct radeon_compiler *compiler, void *user)
+{
+	struct r300_vertex_program_compiler * c = (struct r300_vertex_program_compiler*)compiler;
+	struct rc_instruction *inst, *lastARL = NULL;
+	int min_offset = 0;
+
+	for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		if (inst->U.I.Opcode == RC_OPCODE_ARL) {
+			if (lastARL != NULL && min_offset < 0)
+				transform_negative_addressing(c, lastARL, inst, min_offset);
+
+			lastARL = inst;
+			min_offset = 0;
+			continue;
+		}
+
+		for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+			if (inst->U.I.SrcReg[i].RelAddr &&
+			    inst->U.I.SrcReg[i].Index < 0) {
+				/* ARL must precede any indirect addressing. */
+				if (lastARL == NULL) {
+					rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL.");
+					return;
+				}
+
+				if (inst->U.I.SrcReg[i].Index < min_offset)
+					min_offset = inst->U.I.SrcReg[i].Index;
+			}
+		}
+	}
+
+	if (lastARL != NULL && min_offset < 0)
+		transform_negative_addressing(c, lastARL, inst, min_offset);
+}
+
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+	.IsNative = &swizzle_is_native,
+	.Split = 0 /* should never be called */
+};
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
+{
+	int is_r500 = c->Base.is_r500;
+	int opt = !c->Base.disable_optimizations;
+
+	/* Lists of instruction transformations. */
+	struct radeon_program_transformation alu_rewrite_r500[] = {
+		{ &r300_transform_vertex_alu, 0 },
+		{ &r300_transform_trig_scale_vertex, 0 },
+		{ 0, 0 }
+	};
+
+	struct radeon_program_transformation alu_rewrite_r300[] = {
+		{ &r300_transform_vertex_alu, 0 },
+		{ &r300_transform_trig_simple, 0 },
+		{ 0, 0 }
+	};
+
+	/* Note: These passes have to be done seperately from ALU rewrite,
+	 * otherwise non-native ALU instructions with source conflits
+	 * or non-native modifiers will not be treated properly.
+	 */
+	struct radeon_program_transformation emulate_modifiers[] = {
+		{ &transform_nonnative_modifiers, 0 },
+		{ 0, 0 }
+	};
+
+	struct radeon_program_transformation resolve_src_conflicts[] = {
+		{ &transform_source_conflicts, 0 },
+		{ 0, 0 }
+	};
+
+	/* List of compiler passes. */
+	struct radeon_compiler_pass vs_list[] = {
+		/* NAME				DUMP PREDICATE	FUNCTION			PARAM */
+		{"add artificial outputs",	0, 1,		rc_vs_add_artificial_outputs,	NULL},
+		{"transform loops",		1, 1,		rc_transform_loops,		NULL},
+		{"emulate branches",		1, !is_r500,	rc_emulate_branches,		NULL},
+		{"emulate negative addressing", 1, 1,		rc_emulate_negative_addressing,	NULL},
+		{"native rewrite",		1, is_r500,	rc_local_transform,		alu_rewrite_r500},
+		{"native rewrite",		1, !is_r500,	rc_local_transform,		alu_rewrite_r300},
+		{"emulate modifiers",		1, !is_r500,	rc_local_transform,		emulate_modifiers},
+		{"deadcode",			1, opt,		rc_dataflow_deadcode,		dataflow_outputs_mark_used},
+		{"dataflow optimize",		1, opt,		rc_optimize,			NULL},
+		/* This pass must be done after optimizations. */
+		{"source conflict resolve",	1, 1,		rc_local_transform,		resolve_src_conflicts},
+		{"register allocation",		1, opt,		allocate_temporary_registers,	NULL},
+		{"dead constants",		1, 1,		rc_remove_unused_constants,	&c->code->constants_remap_table},
+		{"final code validation",	0, 1,		rc_validate_final_shader,	NULL},
+		{"machine code generation",	0, 1,		translate_vertex_program,	NULL},
+		{"dump machine code",		0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump,	NULL},
+		{NULL, 0, 0, NULL, NULL}
+	};
+
+	c->Base.type = RC_VERTEX_PROGRAM;
+	c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+
+	rc_run_compiler(&c->Base, vs_list);
+
+	c->code->InputsRead = c->Base.Program.InputsRead;
+	c->code->OutputsWritten = c->Base.Program.OutputsWritten;
+	rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+}
diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
new file mode 100644
index 00000000000..2bc0a87eed8
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+#include "radeon_code.h"
+#include "../r300_reg.h"
+
+#include <stdio.h>
+
+static char* r300_vs_ve_ops[] = {
+	/* R300 vector ops */
+	"                 VE_NO_OP",
+	"           VE_DOT_PRODUCT",
+	"              VE_MULTIPLY",
+	"                   VE_ADD",
+	"          VE_MULTIPLY_ADD",
+	"       VE_DISTANCE_FACTOR",
+	"              VE_FRACTION",
+	"               VE_MAXIMUM",
+	"               VE_MINIMUM",
+	"VE_SET_GREATER_THAN_EQUAL",
+	"         VE_SET_LESS_THAN",
+	"        VE_MULTIPLYX2_ADD",
+	"        VE_MULTIPLY_CLAMP",
+	"            VE_FLT2FIX_DX",
+	"        VE_FLT2FIX_DX_RND",
+	/* R500 vector ops */
+	"      VE_PRED_SET_EQ_PUSH",
+	"      VE_PRED_SET_GT_PUSH",
+	"     VE_PRED_SET_GTE_PUSH",
+	"     VE_PRED_SET_NEQ_PUSH",
+	"         VE_COND_WRITE_EQ",
+	"         VE_COND_WRITE_GT",
+	"        VE_COND_WRITE_GTE",
+	"        VE_COND_WRITE_NEQ",
+	"           VE_COND_MUX_EQ",
+	"           VE_COND_MUX_GT",
+	"          VE_COND_MUX_GTE",
+	"      VE_SET_GREATER_THAN",
+	"             VE_SET_EQUAL",
+	"         VE_SET_NOT_EQUAL",
+	"               (reserved)",
+	"               (reserved)",
+	"               (reserved)",
+};
+
+static char* r300_vs_me_ops[] = {
+	/* R300 math ops */
+	"                 ME_NO_OP",
+	"          ME_EXP_BASE2_DX",
+	"          ME_LOG_BASE2_DX",
+	"          ME_EXP_BASEE_FF",
+	"        ME_LIGHT_COEFF_DX",
+	"         ME_POWER_FUNC_FF",
+	"              ME_RECIP_DX",
+	"              ME_RECIP_FF",
+	"         ME_RECIP_SQRT_DX",
+	"         ME_RECIP_SQRT_FF",
+	"              ME_MULTIPLY",
+	"     ME_EXP_BASE2_FULL_DX",
+	"     ME_LOG_BASE2_FULL_DX",
+	" ME_POWER_FUNC_FF_CLAMP_B",
+	"ME_POWER_FUNC_FF_CLAMP_B1",
+	"ME_POWER_FUNC_FF_CLAMP_01",
+	"                   ME_SIN",
+	"                   ME_COS",
+	/* R500 math ops */
+	"        ME_LOG_BASE2_IEEE",
+	"            ME_RECIP_IEEE",
+	"       ME_RECIP_SQRT_IEEE",
+	"           ME_PRED_SET_EQ",
+	"           ME_PRED_SET_GT",
+	"          ME_PRED_SET_GTE",
+	"          ME_PRED_SET_NEQ",
+	"          ME_PRED_SET_CLR",
+	"          ME_PRED_SET_INV",
+	"          ME_PRED_SET_POP",
+	"      ME_PRED_SET_RESTORE",
+	"               (reserved)",
+	"               (reserved)",
+	"               (reserved)",
+};
+
+/* XXX refactor to avoid clashing symbols */
+static char* r300_vs_src_debug[] = {
+	"t",
+	"i",
+	"c",
+	"a",
+};
+
+static char* r300_vs_dst_debug[] = {
+	"t",
+	"a0",
+	"o",
+	"ox",
+	"a",
+	"i",
+	"u",
+	"u",
+};
+
+static char* r300_vs_swiz_debug[] = {
+	"X",
+	"Y",
+	"Z",
+	"W",
+	"0",
+	"1",
+	"U",
+	"U",
+};
+
+
+static void r300_vs_op_dump(uint32_t op)
+{
+	fprintf(stderr, " dst: %d%s op: ",
+			(op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+	if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) {
+		fprintf(stderr, "PRED %u",
+				(op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1);
+	}
+	if (op & 0x80) {
+		if (op & 0x1) {
+			fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
+		} else {
+			fprintf(stderr, "   PVS_MACRO_OP_2CLK_MADD\n");
+		}
+	} else if (op & 0x40) {
+		fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]);
+	} else {
+		fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]);
+	}
+}
+
+static void r300_vs_src_dump(uint32_t src)
+{
+	fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
+			(src >> 5) & 0xff, r300_vs_src_debug[src & 0x3],
+			src & (1 << 25) ? "-" : " ",
+			r300_vs_swiz_debug[(src >> 13) & 0x7],
+			src & (1 << 26) ? "-" : " ",
+			r300_vs_swiz_debug[(src >> 16) & 0x7],
+			src & (1 << 27) ? "-" : " ",
+			r300_vs_swiz_debug[(src >> 19) & 0x7],
+			src & (1 << 28) ? "-" : " ",
+			r300_vs_swiz_debug[(src >> 22) & 0x7]);
+}
+
+void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user)
+{
+	struct r300_vertex_program_compiler *c = (struct r300_vertex_program_compiler*)compiler;
+	struct r300_vertex_program_code * vs = c->code;
+	unsigned instrcount = vs->length / 4;
+	unsigned i;
+
+	fprintf(stderr, "Final vertex program code:\n");
+
+	for(i = 0; i < instrcount; i++) {
+		unsigned offset = i*4;
+		unsigned src;
+
+		fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]);
+		r300_vs_op_dump(vs->body.d[offset]);
+
+		for(src = 0; src < 3; ++src) {
+			fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]);
+			r300_vs_src_dump(vs->body.d[offset+1+src]);
+		}
+	}
+
+	fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
+	for(i = 0; i < vs->num_fc_ops; i++) {
+		switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
+		case 0: fprintf(stderr, "NOP"); break;
+		case 1: fprintf(stderr, "JUMP"); break;
+		case 2: fprintf(stderr, "LOOP"); break;
+		case 3: fprintf(stderr, "JSR"); break;
+		}
+		if (c->Base.is_r500) {
+			fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
+				vs->fc_op_addrs.r500[i].uw,
+				vs->fc_op_addrs.r500[i].lw);
+		} else {
+			fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.c b/src/gallium/drivers/r300/compiler/r500_fragprog.c
new file mode 100644
index 00000000000..cf99f5e4538
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r500_fragprog.c
@@ -0,0 +1,539 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler_util.h"
+#include "radeon_list.h"
+#include "radeon_variable.h"
+#include "../r300_reg.h"
+
+/**
+ * Rewrite IF instructions to use the ALU result special register.
+ */
+int r500_transform_IF(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst_if,
+	void *data)
+{
+	struct rc_variable * writer;
+	struct rc_list * writer_list, * list_ptr;
+	struct rc_list * var_list = rc_get_variables(c);
+	unsigned int generic_if = 0;
+	unsigned int alu_chan;
+
+	if (inst_if->U.I.Opcode != RC_OPCODE_IF) {
+		return 0;
+	}
+
+	writer_list = rc_variable_list_get_writers(
+			var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]);
+	if (!writer_list) {
+		generic_if = 1;
+	} else {
+
+		/* Make sure it is safe for the writers to write to
+		 * ALU Result */
+		for (list_ptr = writer_list; list_ptr;
+						list_ptr = list_ptr->Next) {
+			struct rc_instruction * inst;
+			writer = list_ptr->Item;
+			/* We are going to modify the destination register
+			 * of writer, so if it has a reader other than
+			 * inst_if (aka ReaderCount > 1) we must fall back to
+			 * our generic IF.
+			 * If the writer has a lower IP than inst_if, this
+			 * means that inst_if is above the writer in a loop.
+			 * I'm not sure why this would ever happen, but
+			 * if it does we want to make sure we fall back
+			 * to our generic IF. */
+			if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) {
+				generic_if = 1;
+				break;
+			}
+
+			/* The ALU Result is not preserved across IF
+			 * instructions, so if there is another IF
+			 * instruction between writer and inst_if, then
+			 * we need to fall back to generic IF. */
+			for (inst = writer->Inst; inst != inst_if; inst = inst->Next) {
+				const struct rc_opcode_info * info =
+					rc_get_opcode_info(inst->U.I.Opcode);
+				if (info->IsFlowControl) {
+					generic_if = 1;
+					break;
+				}
+			}
+			if (generic_if) {
+				break;
+			}
+		}
+	}
+
+	if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) {
+		alu_chan = RC_ALURESULT_X;
+	} else {
+		alu_chan = RC_ALURESULT_W;
+	}
+	if (generic_if) {
+		struct rc_instruction * inst_mov =
+				rc_insert_new_instruction(c, inst_if->Prev);
+
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.WriteMask = 0;
+		inst_mov->U.I.DstReg.File = RC_FILE_NONE;
+		inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
+		inst_mov->U.I.WriteALUResult = alu_chan;
+		inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+		if (alu_chan == RC_ALURESULT_X) {
+			inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
+					inst_mov->U.I.SrcReg[0].Swizzle,
+					RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
+					RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
+		} else {
+			inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
+					inst_mov->U.I.SrcReg[0].Swizzle,
+					RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
+					RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z);
+		}
+	} else {
+		rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER;
+		unsigned int reverse_srcs = 0;
+		unsigned int preserve_opcode = 0;
+		for (list_ptr = writer_list; list_ptr;
+						list_ptr = list_ptr->Next) {
+			writer = list_ptr->Item;
+			switch(writer->Inst->U.I.Opcode) {
+			case RC_OPCODE_SEQ:
+				compare_func = RC_COMPARE_FUNC_EQUAL;
+				break;
+			case RC_OPCODE_SNE:
+				compare_func = RC_COMPARE_FUNC_NOTEQUAL;
+				break;
+			case RC_OPCODE_SLE:
+				reverse_srcs = 1;
+				/* Fall through */
+			case RC_OPCODE_SGE:
+				compare_func = RC_COMPARE_FUNC_GEQUAL;
+				break;
+			case RC_OPCODE_SGT:
+				reverse_srcs = 1;
+				/* Fall through */
+			case RC_OPCODE_SLT:
+				compare_func = RC_COMPARE_FUNC_LESS;
+				break;
+			default:
+				compare_func = RC_COMPARE_FUNC_NOTEQUAL;
+				preserve_opcode = 1;
+				break;
+			}
+			if (!preserve_opcode) {
+				writer->Inst->U.I.Opcode = RC_OPCODE_SUB;
+			}
+			writer->Inst->U.I.DstReg.WriteMask = 0;
+			writer->Inst->U.I.DstReg.File = RC_FILE_NONE;
+			writer->Inst->U.I.WriteALUResult = alu_chan;
+			writer->Inst->U.I.ALUResultCompare = compare_func;
+			if (reverse_srcs) {
+				struct rc_src_register temp_src;
+				temp_src = writer->Inst->U.I.SrcReg[0];
+				writer->Inst->U.I.SrcReg[0] =
+					writer->Inst->U.I.SrcReg[1];
+				writer->Inst->U.I.SrcReg[1] = temp_src;
+			}
+		}
+	}
+
+	inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
+	inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
+	inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(
+				RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
+				RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
+	inst_if->U.I.SrcReg[0].Negate = 0;
+
+	return 1;
+}
+
+static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+	unsigned int relevant;
+	int i;
+
+	if (opcode == RC_OPCODE_TEX ||
+	    opcode == RC_OPCODE_TXB ||
+	    opcode == RC_OPCODE_TXP ||
+	    opcode == RC_OPCODE_TXD ||
+	    opcode == RC_OPCODE_TXL ||
+	    opcode == RC_OPCODE_KIL) {
+		if (reg.Abs)
+			return 0;
+
+		if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
+			return 0;
+
+		for(i = 0; i < 4; ++i) {
+			unsigned int swz = GET_SWZ(reg.Swizzle, i);
+			if (swz == RC_SWIZZLE_UNUSED) {
+				reg.Negate &= ~(1 << i);
+				continue;
+			}
+			if (swz >= 4)
+				return 0;
+		}
+
+		if (reg.Negate)
+			return 0;
+
+		return 1;
+	} else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
+		/* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
+		 * if it doesn't fit perfectly into a .xyzw case... */
+		if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
+			return 1;
+
+		return 0;
+	} else {
+		/* ALU instructions support almost everything */
+		relevant = 0;
+		for(i = 0; i < 3; ++i) {
+			unsigned int swz = GET_SWZ(reg.Swizzle, i);
+			if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
+				relevant |= 1 << i;
+		}
+		if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+			return 0;
+
+		return 1;
+	}
+}
+
+/**
+ * Split source register access.
+ *
+ * The only thing we *cannot* do in an ALU instruction is per-component
+ * negation.
+ */
+static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
+		struct rc_swizzle_split * split)
+{
+	unsigned int negatebase[2] = { 0, 0 };
+	int i;
+
+	for(i = 0; i < 4; ++i) {
+		unsigned int swz = GET_SWZ(src.Swizzle, i);
+		if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
+			continue;
+		negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
+	}
+
+	split->NumPhases = 0;
+
+	for(i = 0; i <= 1; ++i) {
+		if (!negatebase[i])
+			continue;
+
+		split->Phase[split->NumPhases++] = negatebase[i];
+	}
+}
+
+struct rc_swizzle_caps r500_swizzle_caps = {
+	.IsNative = r500_swizzle_is_native,
+	.Split = r500_swizzle_split
+};
+
+static char *toswiz(int swiz_val) {
+  switch(swiz_val) {
+  case 0: return "R";
+  case 1: return "G";
+  case 2: return "B";
+  case 3: return "A";
+  case 4: return "0";
+  case 5: return "H";
+  case 6: return "1";
+  case 7: return "U";
+  }
+  return NULL;
+}
+
+static char *toop(int op_val)
+{
+  char *str = NULL;
+  switch (op_val) {
+  case 0: str = "MAD"; break;
+  case 1: str = "DP3"; break;
+  case 2: str = "DP4"; break;
+  case 3: str = "D2A"; break;
+  case 4: str = "MIN"; break;
+  case 5: str = "MAX"; break;
+  case 6: str = "Reserved"; break;
+  case 7: str = "CND"; break;
+  case 8: str = "CMP"; break;
+  case 9: str = "FRC"; break;
+  case 10: str = "SOP"; break;
+  case 11: str = "MDH"; break;
+  case 12: str = "MDV"; break;
+  }
+  return str;
+}
+
+static char *to_alpha_op(int op_val)
+{
+  char *str = NULL;
+  switch (op_val) {
+  case 0: str = "MAD"; break;
+  case 1: str = "DP"; break;
+  case 2: str = "MIN"; break;
+  case 3: str = "MAX"; break;
+  case 4: str = "Reserved"; break;
+  case 5: str = "CND"; break;
+  case 6: str = "CMP"; break;
+  case 7: str = "FRC"; break;
+  case 8: str = "EX2"; break;
+  case 9: str = "LN2"; break;
+  case 10: str = "RCP"; break;
+  case 11: str = "RSQ"; break;
+  case 12: str = "SIN"; break;
+  case 13: str = "COS"; break;
+  case 14: str = "MDH"; break;
+  case 15: str = "MDV"; break;
+  }
+  return str;
+}
+
+static char *to_mask(int val)
+{
+  char *str = NULL;
+  switch(val) {
+  case 0: str = "NONE"; break;
+  case 1: str = "R"; break;
+  case 2: str = "G"; break;
+  case 3: str = "RG"; break;
+  case 4: str = "B"; break;
+  case 5: str = "RB"; break;
+  case 6: str = "GB"; break;
+  case 7: str = "RGB"; break;
+  case 8: str = "A"; break;
+  case 9: str = "AR"; break;
+  case 10: str = "AG"; break;
+  case 11: str = "ARG"; break;
+  case 12: str = "AB"; break;
+  case 13: str = "ARB"; break;
+  case 14: str = "AGB"; break;
+  case 15: str = "ARGB"; break;
+  }
+  return str;
+}
+
+static char *to_texop(int val)
+{
+  switch(val) {
+  case 0: return "NOP";
+  case 1: return "LD";
+  case 2: return "TEXKILL";
+  case 3: return "PROJ";
+  case 4: return "LODBIAS";
+  case 5: return "LOD";
+  case 6: return "DXDY";
+  }
+  return NULL;
+}
+
+void r500FragmentProgramDump(struct radeon_compiler *c, void *user)
+{
+  struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+  struct r500_fragment_program_code *code = &compiler->code->code.r500;
+  int n, i;
+  uint32_t inst;
+  uint32_t inst0;
+  char *str = NULL;
+  fprintf(stderr, "R500 Fragment Program:\n--------\n");
+
+  for (n = 0; n < code->inst_end+1; n++) {
+    inst0 = inst = code->inst[n].inst0;
+    fprintf(stderr,"%d\t0:CMN_INST   0x%08x:", n, inst);
+    switch(inst & 0x3) {
+    case R500_INST_TYPE_ALU: str = "ALU"; break;
+    case R500_INST_TYPE_OUT: str = "OUT"; break;
+    case R500_INST_TYPE_FC: str = "FC"; break;
+    case R500_INST_TYPE_TEX: str = "TEX"; break;
+    };
+    fprintf(stderr,"%s %s %s %s %s ", str,
+	    inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
+	    inst & R500_INST_LAST ? "LAST" : "",
+	    inst & R500_INST_NOP ? "NOP" : "",
+	    inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
+    fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
+	    to_mask((inst >> 15) & 0xf));
+
+    switch(inst0 & 0x3) {
+    case R500_INST_TYPE_ALU:
+    case R500_INST_TYPE_OUT:
+      fprintf(stderr,"\t1:RGB_ADDR   0x%08x:", code->inst[n].inst1);
+      inst = code->inst[n].inst1;
+
+      fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+	      inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+	      (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+	      (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+	      (inst >> 30));
+
+      fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
+      inst = code->inst[n].inst2;
+      fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+	      inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+	      (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+	      (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+	      (inst >> 30));
+      fprintf(stderr,"\t3 RGB_INST:  0x%08x:", code->inst[n].inst3);
+      inst = code->inst[n].inst3;
+      fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
+	      (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
+	      (inst >> 11) & 0x3,
+	      (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
+	      (inst >> 24) & 0x3, (inst >> 29) & 0x3);
+
+
+      fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
+      inst = code->inst[n].inst4;
+      fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),
+	      (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+	      (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
+	      (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
+	      (inst >> 29) & 0x3,
+	      (inst >> 31) & 0x1);
+
+      fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
+      inst = code->inst[n].inst5;
+      fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
+	      (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+	      (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
+	      (inst >> 23) & 0x3,
+	      (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
+      break;
+    case R500_INST_TYPE_FC:
+      fprintf(stderr, "\t2:FC_INST    0x%08x:", code->inst[n].inst2);
+      inst = code->inst[n].inst2;
+      /* JUMP_FUNC JUMP_ANY*/
+      fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff,
+          (inst & R500_FC_JUMP_ANY) >> 5);
+      
+      /* OP */
+      switch(inst & 0x7){
+      case R500_FC_OP_JUMP:
+      	fprintf(stderr, "JUMP");
+        break;
+      case R500_FC_OP_LOOP:
+        fprintf(stderr, "LOOP");
+        break;
+      case R500_FC_OP_ENDLOOP:
+        fprintf(stderr, "ENDLOOP");
+        break;
+      case R500_FC_OP_REP:
+        fprintf(stderr, "REP");
+        break;
+      case R500_FC_OP_ENDREP:
+        fprintf(stderr, "ENDREP");
+        break;
+      case R500_FC_OP_BREAKLOOP:
+        fprintf(stderr, "BREAKLOOP");
+        break;
+      case R500_FC_OP_BREAKREP:
+        fprintf(stderr, "BREAKREP");
+	break;
+      case R500_FC_OP_CONTINUE:
+        fprintf(stderr, "CONTINUE");
+        break;
+      }
+      fprintf(stderr," "); 
+      /* A_OP */
+      switch(inst & (0x3 << 6)){
+      case R500_FC_A_OP_NONE:
+        fprintf(stderr, "NONE");
+        break;
+      case R500_FC_A_OP_POP:
+	fprintf(stderr, "POP");
+        break;
+      case R500_FC_A_OP_PUSH:
+        fprintf(stderr, "PUSH");
+        break;
+      }
+      /* B_OP0 B_OP1 */
+      for(i=0; i<2; i++){
+        fprintf(stderr, " ");
+        switch(inst & (0x3 << (24 + (i * 2)))){
+        /* R500_FC_B_OP0_NONE 
+	 * R500_FC_B_OP1_NONE */
+	case 0:
+          fprintf(stderr, "NONE");
+          break;
+        case R500_FC_B_OP0_DECR:
+        case R500_FC_B_OP1_DECR:
+          fprintf(stderr, "DECR");
+          break;
+        case R500_FC_B_OP0_INCR:
+        case R500_FC_B_OP1_INCR:
+          fprintf(stderr, "INCR");
+          break;
+        }
+      }
+      /*POP_CNT B_ELSE */
+      fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4);
+      inst = code->inst[n].inst3;
+      /* JUMP_ADDR */
+      fprintf(stderr, " %d", inst >> 16);
+      
+      if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){
+        fprintf(stderr, " IGN_UNC");
+      }
+      inst = code->inst[n].inst3;
+      fprintf(stderr, "\n\t3:FC_ADDR    0x%08x:", inst);
+      fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n",
+      inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); 
+      break;
+    case R500_INST_TYPE_TEX:
+      inst = code->inst[n].inst1;
+      fprintf(stderr,"\t1:TEX_INST:  0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
+	      to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
+	      (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
+      inst = code->inst[n].inst2;
+      fprintf(stderr,"\t2:TEX_ADDR:  0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
+	      inst & 127, inst & (1<<7) ? "(rel)" : "",
+	      toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
+	      toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
+	      (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
+	      toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
+	      toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
+
+      fprintf(stderr,"\t3:TEX_DXDY:  0x%08x\n", code->inst[n].inst3);
+      break;
+    }
+    fprintf(stderr,"\n");
+  }
+
+}
diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog.h b/src/gallium/drivers/r300/compiler/r500_fragprog.h
new file mode 100644
index 00000000000..6aa448cc6f7
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r500_fragprog.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ *   Ben Skeggs <darktama@iinet.net.au>
+ *   Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R500_FRAGPROG_H_
+#define __R500_FRAGPROG_H_
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+extern void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user);
+
+extern void r500FragmentProgramDump(struct radeon_compiler *c, void *user);
+
+extern struct rc_swizzle_caps r500_swizzle_caps;
+
+extern int r500_transform_IF(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst_if,
+	void* data);
+
+#endif
diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
new file mode 100644
index 00000000000..c30cd753d15
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
@@ -0,0 +1,678 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ *
+ * \author Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_program_pair.h"
+
+#define PROG_CODE \
+	struct r500_fragment_program_code *code = &c->code->code.r500
+
+#define error(fmt, args...) do {			\
+		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
+			__FILE__, __FUNCTION__, ##args);	\
+	} while(0)
+
+
+struct branch_info {
+	int If;
+	int Else;
+	int Endif;
+};
+
+struct r500_loop_info {
+	int BgnLoop;
+
+	int BranchDepth;
+	int * Brks;
+	int BrkCount;
+	int BrkReserved;
+
+	int * Conts;
+	int ContCount;
+	int ContReserved;
+};
+
+struct emit_state {
+	struct radeon_compiler * C;
+	struct r500_fragment_program_code * Code;
+
+	struct branch_info * Branches;
+	unsigned int CurrentBranchDepth;
+	unsigned int BranchesReserved;
+
+	struct r500_loop_info * Loops;
+	unsigned int CurrentLoopDepth;
+	unsigned int LoopsReserved;
+
+	unsigned int MaxBranchDepth;
+
+};
+
+static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+	case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
+	case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
+	case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
+	case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
+	case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
+	case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
+	default:
+		error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
+		/* fall through */
+	case RC_OPCODE_NOP:
+		/* fall through */
+	case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
+	case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
+	case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
+	case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
+	}
+}
+
+static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
+	case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
+	case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
+	case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
+	case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
+	case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
+	case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
+	case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
+	case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
+	case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
+	default:
+		error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
+		/* fall through */
+	case RC_OPCODE_NOP:
+		/* fall through */
+	case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
+	case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
+	case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
+	case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
+	case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
+	case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
+	}
+}
+
+static unsigned int fix_hw_swizzle(unsigned int swz)
+{
+    switch (swz) {
+        case RC_SWIZZLE_ZERO:
+        case RC_SWIZZLE_UNUSED:
+            swz = 4;
+            break;
+        case RC_SWIZZLE_HALF:
+            swz = 5;
+            break;
+        case RC_SWIZZLE_ONE:
+            swz = 6;
+            break;
+    }
+
+	return swz;
+}
+
+static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
+{
+	unsigned int t = inst->RGB.Arg[arg].Source;
+	int comp;
+	t |= inst->RGB.Arg[arg].Negate << 11;
+	t |= inst->RGB.Arg[arg].Abs << 12;
+
+	for(comp = 0; comp < 3; ++comp)
+		t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
+
+	return t;
+}
+
+static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
+{
+	unsigned int t = inst->Alpha.Arg[i].Source;
+	t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
+	t |= inst->Alpha.Arg[i].Negate << 5;
+	t |= inst->Alpha.Arg[i].Abs << 6;
+	return t;
+}
+
+static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
+{
+	switch(func) {
+	case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
+	case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
+	case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
+	case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
+	default:
+		rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
+		return 0;
+	}
+}
+
+static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
+{
+	if (index > code->max_temp_idx)
+		code->max_temp_idx = index;
+}
+
+static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
+{
+	/* From docs:
+	 *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
+	 * MSB = 1 << 7 */
+	if (!src.Used)
+		return 1 << 7;
+
+	if (src.File == RC_FILE_CONSTANT) {
+		return src.Index | R500_RGB_ADDR0_CONST;
+	} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
+		use_temporary(code, src.Index);
+		return src.Index;
+	}
+
+	return 0;
+}
+
+/**
+ * NOP the specified instruction if it is not a texture lookup.
+ */
+static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
+{
+	PROG_CODE;
+
+	if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
+		code->inst[ip].inst0 |= R500_INST_NOP;
+	}
+}
+
+/**
+ * Emit a paired ALU instruction.
+ */
+static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
+{
+	int ip;
+	PROG_CODE;
+
+	if (code->inst_end >= c->Base.max_alu_insts-1) {
+		error("emit_alu: Too many instructions");
+		return;
+	}
+
+	ip = ++code->inst_end;
+
+	/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
+	if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
+		inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
+		if (ip > 0) {
+			alu_nop(c, ip - 1);
+		}
+	}
+
+	code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
+	code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
+
+	if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
+		code->inst[ip].inst0 = R500_INST_TYPE_OUT;
+		if (inst->WriteALUResult) {
+			error("Cannot write output and ALU result at the same time");
+			return;
+		}
+	} else {
+		code->inst[ip].inst0 = R500_INST_TYPE_ALU;
+	}
+	code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
+
+	code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
+	code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
+	code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
+	if (inst->Nop) {
+		code->inst[ip].inst0 |= R500_INST_NOP;
+	}
+	if (inst->Alpha.DepthWriteMask) {
+		code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
+		c->code->writes_depth = 1;
+	}
+
+	code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
+	code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
+	use_temporary(code, inst->Alpha.DestIndex);
+	use_temporary(code, inst->RGB.DestIndex);
+
+	if (inst->RGB.Saturate)
+		code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
+	if (inst->Alpha.Saturate)
+		code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
+
+	/* Set the presubtract operation. */
+	switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+		case RC_PRESUB_BIAS:
+			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
+			break;
+		case RC_PRESUB_SUB:
+			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
+			break;
+		case RC_PRESUB_ADD:
+			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
+			break;
+		case RC_PRESUB_INV:
+			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
+			break;
+		default:
+			break;
+	}
+	switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+		case RC_PRESUB_BIAS:
+			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
+			break;
+		case RC_PRESUB_SUB:
+			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
+			break;
+		case RC_PRESUB_ADD:
+			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
+			break;
+		case RC_PRESUB_INV:
+			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
+			break;
+		default:
+			break;
+	}
+
+	code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
+	code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
+	code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
+
+	code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
+	code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
+	code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
+
+	code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
+	code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
+	code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
+
+	code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
+	code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
+	code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
+
+	code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
+	code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
+
+	if (inst->WriteALUResult) {
+		code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
+
+		if (inst->WriteALUResult == RC_ALURESULT_X)
+			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
+		else
+			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
+
+		code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
+	}
+}
+
+static unsigned int translate_strq_swizzle(unsigned int swizzle)
+{
+	unsigned int swiz = 0;
+	int i;
+	for (i = 0; i < 4; i++)
+		swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
+	return swiz;
+}
+
+/**
+ * Emit a single TEX instruction
+ */
+static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
+{
+	int ip;
+	PROG_CODE;
+
+	if (code->inst_end >= c->Base.max_alu_insts-1) {
+		error("emit_tex: Too many instructions");
+		return 0;
+	}
+
+	ip = ++code->inst_end;
+
+	code->inst[ip].inst0 = R500_INST_TYPE_TEX
+		| (inst->DstReg.WriteMask << 11)
+		| R500_INST_TEX_SEM_WAIT;
+	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
+		| R500_TEX_SEM_ACQUIRE;
+
+	if (inst->TexSrcTarget == RC_TEXTURE_RECT)
+		code->inst[ip].inst1 |= R500_TEX_UNSCALED;
+
+	switch (inst->Opcode) {
+	case RC_OPCODE_KIL:
+		code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
+		break;
+	case RC_OPCODE_TEX:
+		code->inst[ip].inst1 |= R500_TEX_INST_LD;
+		break;
+	case RC_OPCODE_TXB:
+		code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
+		break;
+	case RC_OPCODE_TXP:
+		code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
+		break;
+	case RC_OPCODE_TXD:
+		code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
+		break;
+	case RC_OPCODE_TXL:
+		code->inst[ip].inst1 |= R500_TEX_INST_LOD;
+		break;
+	default:
+		error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
+	}
+
+	use_temporary(code, inst->SrcReg[0].Index);
+	if (inst->Opcode != RC_OPCODE_KIL)
+		use_temporary(code, inst->DstReg.Index);
+
+	code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
+		| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
+		| R500_TEX_DST_ADDR(inst->DstReg.Index)
+		| (GET_SWZ(inst->TexSwizzle, 0) << 24)
+		| (GET_SWZ(inst->TexSwizzle, 1) << 26)
+		| (GET_SWZ(inst->TexSwizzle, 2) << 28)
+		| (GET_SWZ(inst->TexSwizzle, 3) << 30)
+		;
+
+	if (inst->Opcode == RC_OPCODE_TXD) {
+		use_temporary(code, inst->SrcReg[1].Index);
+		use_temporary(code, inst->SrcReg[2].Index);
+
+		/* DX and DY parameters are specified in a separate register. */
+		code->inst[ip].inst3 =
+			R500_DX_ADDR(inst->SrcReg[1].Index) |
+			(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
+			R500_DY_ADDR(inst->SrcReg[2].Index) |
+			(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
+	}
+
+	return 1;
+}
+
+static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
+{
+	unsigned int newip;
+
+	if (s->Code->inst_end >= s->C->max_alu_insts-1) {
+		rc_error(s->C, "emit_tex: Too many instructions");
+		return;
+	}
+
+	newip = ++s->Code->inst_end;
+
+	/* Currently all loops use the same integer constant to intialize
+	 * the loop variables. */
+	if(!s->Code->int_constants[0]) {
+		s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
+		s->Code->int_constant_count = 1;
+	}
+	s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
+
+	switch(inst->U.I.Opcode){
+	struct branch_info * branch;
+	struct r500_loop_info * loop;
+	case RC_OPCODE_BGNLOOP:
+		memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
+			s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
+
+		loop = &s->Loops[s->CurrentLoopDepth++];
+		memset(loop, 0, sizeof(struct r500_loop_info));
+		loop->BranchDepth = s->CurrentBranchDepth;
+		loop->BgnLoop = newip;
+
+		s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
+			| R500_FC_JUMP_FUNC(0x00)
+			| R500_FC_IGNORE_UNCOVERED
+			;
+		break;
+	case RC_OPCODE_BRK:
+		loop = &s->Loops[s->CurrentLoopDepth - 1];
+		memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
+					loop->BrkCount, loop->BrkReserved, 1);
+
+		loop->Brks[loop->BrkCount++] = newip;
+		s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
+			| R500_FC_JUMP_FUNC(0xff)
+			| R500_FC_B_OP1_DECR
+			| R500_FC_B_POP_CNT(
+				s->CurrentBranchDepth - loop->BranchDepth)
+			| R500_FC_IGNORE_UNCOVERED
+			;
+		break;
+
+	case RC_OPCODE_CONT:
+		loop = &s->Loops[s->CurrentLoopDepth - 1];
+		memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
+					loop->ContCount, loop->ContReserved, 1);
+		loop->Conts[loop->ContCount++] = newip;
+		s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
+			| R500_FC_JUMP_FUNC(0xff)
+			| R500_FC_B_OP1_DECR
+			| R500_FC_B_POP_CNT(
+				s->CurrentBranchDepth -	loop->BranchDepth)
+			| R500_FC_IGNORE_UNCOVERED
+			;
+		break;
+
+	case RC_OPCODE_ENDLOOP:
+	{
+		loop = &s->Loops[s->CurrentLoopDepth - 1];
+		/* Emit ENDLOOP */
+		s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
+			| R500_FC_JUMP_FUNC(0xff)
+			| R500_FC_JUMP_ANY
+			| R500_FC_IGNORE_UNCOVERED
+			;
+		/* The constant integer at index 0 is used by all loops. */
+		s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
+			| R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
+			;
+
+		/* Set jump address and int constant for BGNLOOP */
+		s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
+			| R500_FC_JUMP_ADDR(newip)
+			;
+
+		/* Set jump address for the BRK instructions. */
+		while(loop->BrkCount--) {
+			s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
+						R500_FC_JUMP_ADDR(newip + 1);
+		}
+
+		/* Set jump address for CONT instructions. */
+		while(loop->ContCount--) {
+			s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
+						R500_FC_JUMP_ADDR(newip);
+		}
+		s->CurrentLoopDepth--;
+		break;
+	}
+	case RC_OPCODE_IF:
+		if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
+			rc_error(s->C, "Branch depth exceeds hardware limit");
+			return;
+		}
+		memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+				s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
+
+		branch = &s->Branches[s->CurrentBranchDepth++];
+		branch->If = newip;
+		branch->Else = -1;
+		branch->Endif = -1;
+
+		if (s->CurrentBranchDepth > s->MaxBranchDepth)
+			s->MaxBranchDepth = s->CurrentBranchDepth;
+
+		/* actual instruction is filled in at ENDIF time */
+		break;
+	
+	case RC_OPCODE_ELSE:
+		if (!s->CurrentBranchDepth) {
+			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+			return;
+		}
+
+		branch = &s->Branches[s->CurrentBranchDepth - 1];
+		branch->Else = newip;
+
+		/* actual instruction is filled in at ENDIF time */
+		break;
+
+	case RC_OPCODE_ENDIF:
+		if (!s->CurrentBranchDepth) {
+			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+			return;
+		}
+
+		branch = &s->Branches[s->CurrentBranchDepth - 1];
+		branch->Endif = newip;
+
+		s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+			| R500_FC_A_OP_NONE /* no address stack */
+			| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+			| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+			| R500_FC_B_OP1_NONE /* no branch counter if stay */
+			| R500_FC_B_POP_CNT(1)
+			;
+		s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+		s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
+			| R500_FC_A_OP_NONE /* no address stack */
+			| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
+			| R500_FC_B_OP0_INCR /* increment branch counter if stay */
+			| R500_FC_IGNORE_UNCOVERED
+		;
+
+		if (branch->Else >= 0) {
+			/* increment branch counter also if jump */
+			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
+			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
+
+			s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
+				| R500_FC_A_OP_NONE /* no address stack */
+				| R500_FC_B_ELSE /* all active pixels want to jump */
+				| R500_FC_B_OP0_NONE /* no counter op if stay */
+				| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
+				| R500_FC_B_POP_CNT(1)
+			;
+			s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+		} else {
+			/* don't touch branch counter on jump */
+			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
+			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+		}
+
+
+		s->CurrentBranchDepth--;
+		break;
+	default:
+		rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
+	}
+}
+
+void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
+{
+	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+	struct emit_state s;
+	struct r500_fragment_program_code *code = &compiler->code->code.r500;
+
+	memset(&s, 0, sizeof(s));
+	s.C = &compiler->Base;
+	s.Code = code;
+
+	memset(code, 0, sizeof(*code));
+	code->max_temp_idx = 1;
+	code->inst_end = -1;
+
+	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+	    inst = inst->Next) {
+		if (inst->Type == RC_INSTRUCTION_NORMAL) {
+			const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+			if (opcode->IsFlowControl) {
+				emit_flowcontrol(&s, inst);
+			} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+				continue;
+			} else {
+				emit_tex(compiler, &inst->U.I);
+			}
+		} else {
+			emit_paired(compiler, &inst->U.P);
+		}
+	}
+
+	if (code->max_temp_idx >= compiler->Base.max_temp_regs)
+		rc_error(&compiler->Base, "Too many hardware temporaries used");
+
+	if (compiler->Base.Error)
+		return;
+
+	if (code->inst_end == -1 ||
+	    (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
+		int ip;
+
+		/* This may happen when dead-code elimination is disabled or
+		 * when most of the fragment program logic is leading to a KIL */
+		if (code->inst_end >= compiler->Base.max_alu_insts-1) {
+			rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
+			return;
+		}
+
+		ip = ++code->inst_end;
+		code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
+	}
+
+	/* Enable full flow control mode if we are using loops or have if
+	 * statements nested at least four deep. */
+	if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
+		if (code->max_temp_idx < 1)
+			code->max_temp_idx = 1;
+
+		code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_code.c b/src/gallium/drivers/r300/compiler/radeon_code.c
new file mode 100644
index 00000000000..6842fb873bc
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_code.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_code.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "radeon_program.h"
+
+void rc_constants_init(struct rc_constant_list * c)
+{
+	memset(c, 0, sizeof(*c));
+}
+
+/**
+ * Copy a constants structure, assuming that the destination structure
+ * is not initialized.
+ */
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src)
+{
+	dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
+	memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
+	dst->Count = src->Count;
+	dst->_Reserved = src->Count;
+}
+
+void rc_constants_destroy(struct rc_constant_list * c)
+{
+	free(c->Constants);
+	memset(c, 0, sizeof(*c));
+}
+
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant)
+{
+	unsigned index = c->Count;
+
+	if (c->Count >= c->_Reserved) {
+		struct rc_constant * newlist;
+
+		c->_Reserved = c->_Reserved * 2;
+		if (!c->_Reserved)
+			c->_Reserved = 16;
+
+		newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
+		memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
+
+		free(c->Constants);
+		c->Constants = newlist;
+	}
+
+	c->Constants[index] = *constant;
+	c->Count++;
+
+	return index;
+}
+
+
+/**
+ * Add a state vector to the constant list, while trying to avoid duplicates.
+ */
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
+{
+	unsigned index;
+	struct rc_constant constant;
+
+	for(index = 0; index < c->Count; ++index) {
+		if (c->Constants[index].Type == RC_CONSTANT_STATE) {
+			if (c->Constants[index].u.State[0] == state0 &&
+			    c->Constants[index].u.State[1] == state1)
+				return index;
+		}
+	}
+
+	memset(&constant, 0, sizeof(constant));
+	constant.Type = RC_CONSTANT_STATE;
+	constant.Size = 4;
+	constant.u.State[0] = state0;
+	constant.u.State[1] = state1;
+
+	return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate vector to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
+{
+	unsigned index;
+	struct rc_constant constant;
+
+	for(index = 0; index < c->Count; ++index) {
+		if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+			if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
+				return index;
+		}
+	}
+
+	memset(&constant, 0, sizeof(constant));
+	constant.Type = RC_CONSTANT_IMMEDIATE;
+	constant.Size = 4;
+	memcpy(constant.u.Immediate, data, sizeof(float) * 4);
+
+	return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate scalar to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
+{
+	unsigned index;
+	int free_index = -1;
+	struct rc_constant constant;
+
+	for(index = 0; index < c->Count; ++index) {
+		if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+			unsigned comp;
+			for(comp = 0; comp < c->Constants[index].Size; ++comp) {
+				if (c->Constants[index].u.Immediate[comp] == data) {
+					*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
+					return index;
+				}
+			}
+
+			if (c->Constants[index].Size < 4)
+				free_index = index;
+		}
+	}
+
+	if (free_index >= 0) {
+		unsigned comp = c->Constants[free_index].Size++;
+		c->Constants[free_index].u.Immediate[comp] = data;
+		*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
+		return free_index;
+	}
+
+	memset(&constant, 0, sizeof(constant));
+	constant.Type = RC_CONSTANT_IMMEDIATE;
+	constant.Size = 1;
+	constant.u.Immediate[0] = data;
+	*swizzle = RC_SWIZZLE_XXXX;
+
+	return rc_constants_add(c, &constant);
+}
+
+void rc_constants_print(struct rc_constant_list * c)
+{
+	unsigned int i;
+	for(i = 0; i < c->Count; i++) {
+		if (c->Constants[i].Type == RC_CONSTANT_IMMEDIATE) {
+			float * values = c->Constants[i].u.Immediate;
+			fprintf(stderr, "CONST[%u] = "
+				"{ %10.4f %10.4f %10.4f %10.4f }\n",
+				i, values[0],values[1], values[2], values[3]);
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_code.h b/src/gallium/drivers/r300/compiler/radeon_code.h
new file mode 100644
index 00000000000..67e6acf8b10
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_code.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_CODE_H
+#define RADEON_CODE_H
+
+#include <stdint.h>
+
+#define R300_PFS_MAX_ALU_INST     64
+#define R300_PFS_MAX_TEX_INST     32
+#define R300_PFS_MAX_TEX_INDIRECT 4
+#define R300_PFS_NUM_TEMP_REGS    32
+#define R300_PFS_NUM_CONST_REGS   32
+
+#define R400_PFS_MAX_ALU_INST     512
+#define R400_PFS_MAX_TEX_INST     512
+
+#define R500_PFS_MAX_INST         512
+#define R500_PFS_NUM_TEMP_REGS    128
+#define R500_PFS_NUM_CONST_REGS   256
+#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
+#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
+
+
+#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+
+enum {
+	/**
+	 * External constants are constants whose meaning is unknown to this
+	 * compiler. For example, a Mesa gl_program's constants are turned
+	 * into external constants.
+	 */
+	RC_CONSTANT_EXTERNAL = 0,
+
+	RC_CONSTANT_IMMEDIATE,
+
+	/**
+	 * Constant referring to state that is known by this compiler,
+	 * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
+	 */
+	RC_CONSTANT_STATE
+};
+
+enum {
+	RC_STATE_SHADOW_AMBIENT = 0,
+
+	RC_STATE_R300_WINDOW_DIMENSION,
+	RC_STATE_R300_TEXRECT_FACTOR,
+	RC_STATE_R300_TEXSCALE_FACTOR,
+	RC_STATE_R300_VIEWPORT_SCALE,
+	RC_STATE_R300_VIEWPORT_OFFSET
+};
+
+struct rc_constant {
+	unsigned Type:2; /**< RC_CONSTANT_xxx */
+	unsigned Size:3;
+
+	union {
+		unsigned External;
+		float Immediate[4];
+		unsigned State[2];
+	} u;
+};
+
+struct rc_constant_list {
+	struct rc_constant * Constants;
+	unsigned Count;
+
+	unsigned _Reserved;
+};
+
+void rc_constants_init(struct rc_constant_list * c);
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
+void rc_constants_destroy(struct rc_constant_list * c);
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
+void rc_constants_print(struct rc_constant_list * c);
+
+/**
+ * Compare functions.
+ *
+ * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
+ * the correct GL compare function.
+ */
+typedef enum {
+	RC_COMPARE_FUNC_NEVER = 0,
+	RC_COMPARE_FUNC_LESS,
+	RC_COMPARE_FUNC_EQUAL,
+	RC_COMPARE_FUNC_LEQUAL,
+	RC_COMPARE_FUNC_GREATER,
+	RC_COMPARE_FUNC_NOTEQUAL,
+	RC_COMPARE_FUNC_GEQUAL,
+	RC_COMPARE_FUNC_ALWAYS
+} rc_compare_func;
+
+/**
+ * Coordinate wrapping modes.
+ *
+ * These are not quite the same as their GL counterparts yet.
+ */
+typedef enum {
+	RC_WRAP_NONE = 0,
+	RC_WRAP_REPEAT,
+	RC_WRAP_MIRRORED_REPEAT,
+	RC_WRAP_MIRRORED_CLAMP
+} rc_wrap_mode;
+
+/**
+ * Stores state that influences the compilation of a fragment program.
+ */
+struct r300_fragment_program_external_state {
+	struct {
+		/**
+		 * This field contains swizzle for some lowering passes
+		 * (shadow comparison, unorm->snorm conversion)
+		 */
+		unsigned texture_swizzle:12;
+
+		/**
+		 * If the sampler is used as a shadow sampler,
+		 * this field specifies the compare function.
+		 *
+		 * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
+		 * \sa rc_compare_func
+		 */
+		unsigned texture_compare_func : 3;
+
+		/**
+		 * No matter what the sampler type is,
+		 * this field turns it into a shadow sampler.
+		 */
+		unsigned compare_mode_enabled : 1;
+
+		/**
+		 * If the sampler will receive non-normalized coords,
+		 * this field is set. The scaling factor is given by
+		 * RC_STATE_R300_TEXRECT_FACTOR.
+		 */
+		unsigned non_normalized_coords : 1;
+
+		/**
+		 * This field specifies wrapping modes for the sampler.
+		 *
+		 * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
+		 * will be performed on the coordinates.
+		 */
+		unsigned wrap_mode : 3;
+
+		/**
+		 * The coords are scaled after applying the wrap mode emulation
+		 * and right before texture fetch. The scaling factor is given by
+		 * RC_STATE_R300_TEXSCALE_FACTOR. */
+		unsigned clamp_and_scale_before_fetch : 1;
+
+		/**
+		 * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM
+		 * in the shader.
+		 */
+		unsigned convert_unorm_to_snorm:1;
+	} unit[16];
+
+	unsigned frag_clamp:1;
+};
+
+
+
+struct r300_fragment_program_node {
+	int tex_offset; /**< first tex instruction */
+	int tex_end; /**< last tex instruction, relative to tex_offset */
+	int alu_offset; /**< first ALU instruction */
+	int alu_end; /**< last ALU instruction, relative to alu_offset */
+	int flags;
+};
+
+/**
+ * Stores an R300 fragment program in its compiled-to-hardware form.
+ */
+struct r300_fragment_program_code {
+	struct {
+		unsigned int length; /**< total # of texture instructions used */
+		uint32_t inst[R400_PFS_MAX_TEX_INST];
+	} tex;
+
+	struct {
+		unsigned int length; /**< total # of ALU instructions used */
+		struct {
+			uint32_t rgb_inst;
+			uint32_t rgb_addr;
+			uint32_t alpha_inst;
+			uint32_t alpha_addr;
+			uint32_t r400_ext_addr;
+		} inst[R400_PFS_MAX_ALU_INST];
+	} alu;
+
+	uint32_t config; /* US_CONFIG */
+	uint32_t pixsize; /* US_PIXSIZE */
+	uint32_t code_offset; /* US_CODE_OFFSET */
+	uint32_t r400_code_offset_ext; /* US_CODE_EXT */
+	uint32_t code_addr[4]; /* US_CODE_ADDR */
+	/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
+	 * for r400 cards */
+	unsigned int r390_mode:1;
+};
+
+
+struct r500_fragment_program_code {
+	struct {
+		uint32_t inst0;
+		uint32_t inst1;
+		uint32_t inst2;
+		uint32_t inst3;
+		uint32_t inst4;
+		uint32_t inst5;
+	} inst[R500_PFS_MAX_INST];
+
+	int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
+
+	int max_temp_idx;
+
+	uint32_t us_fc_ctrl;
+
+	uint32_t int_constants[32];
+	uint32_t int_constant_count;
+};
+
+struct rX00_fragment_program_code {
+	union {
+		struct r300_fragment_program_code r300;
+		struct r500_fragment_program_code r500;
+	} code;
+
+	unsigned writes_depth:1;
+
+	struct rc_constant_list constants;
+	unsigned *constants_remap_table;
+};
+
+
+#define R300_VS_MAX_ALU		256
+#define R300_VS_MAX_ALU_DWORDS  (R300_VS_MAX_ALU * 4)
+#define R500_VS_MAX_ALU	        1024
+#define R500_VS_MAX_ALU_DWORDS  (R500_VS_MAX_ALU * 4)
+#define R300_VS_MAX_TEMPS	32
+/* This is the max for all chipsets (r300-r500) */
+#define R300_VS_MAX_FC_OPS 16
+/* The r500 maximum depth is not just for loops, but any combination of loops
+ * and subroutine jumps. */
+#define R500_VS_MAX_FC_DEPTH 8
+#define R300_VS_MAX_LOOP_DEPTH 1
+
+#define VSF_MAX_INPUTS 32
+#define VSF_MAX_OUTPUTS 32
+
+struct r300_vertex_program_code {
+	int length;
+	union {
+		uint32_t d[R500_VS_MAX_ALU_DWORDS];
+		float f[R500_VS_MAX_ALU_DWORDS];
+	} body;
+
+	int pos_end;
+	int num_temporaries;	/* Number of temp vars used by program */
+	int inputs[VSF_MAX_INPUTS];
+	int outputs[VSF_MAX_OUTPUTS];
+
+	struct rc_constant_list constants;
+	unsigned *constants_remap_table;
+
+	uint32_t InputsRead;
+	uint32_t OutputsWritten;
+
+	unsigned int num_fc_ops;
+	uint32_t fc_ops;
+	union {
+	        uint32_t r300[R300_VS_MAX_FC_OPS];
+		struct {
+			uint32_t lw;
+			uint32_t uw;
+		} r500[R300_VS_MAX_FC_OPS];
+	} fc_op_addrs;
+	int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
+};
+
+#endif /* RADEON_CODE_H */
+
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.c b/src/gallium/drivers/r300/compiler/radeon_compiler.c
new file mode 100644
index 00000000000..b7936725d85
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_compiler.c
@@ -0,0 +1,489 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "radeon_dataflow.h"
+#include "radeon_program.h"
+#include "radeon_program_pair.h"
+#include "radeon_compiler_util.h"
+
+
+void rc_init(struct radeon_compiler * c)
+{
+	memset(c, 0, sizeof(*c));
+
+	memory_pool_init(&c->Pool);
+	c->Program.Instructions.Prev = &c->Program.Instructions;
+	c->Program.Instructions.Next = &c->Program.Instructions;
+	c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+}
+
+void rc_destroy(struct radeon_compiler * c)
+{
+	rc_constants_destroy(&c->Program.Constants);
+	memory_pool_destroy(&c->Pool);
+	free(c->ErrorMsg);
+}
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
+{
+	va_list ap;
+
+	if (!(c->Debug & RC_DBG_LOG))
+		return;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+}
+
+void rc_error(struct radeon_compiler * c, const char * fmt, ...)
+{
+	va_list ap;
+
+	c->Error = 1;
+
+	if (!c->ErrorMsg) {
+		/* Only remember the first error */
+		char buf[1024];
+		int written;
+
+		va_start(ap, fmt);
+		written = vsnprintf(buf, sizeof(buf), fmt, ap);
+		va_end(ap);
+
+		if (written < sizeof(buf)) {
+			c->ErrorMsg = strdup(buf);
+		} else {
+			c->ErrorMsg = malloc(written + 1);
+
+			va_start(ap, fmt);
+			vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
+			va_end(ap);
+		}
+	}
+
+	if (c->Debug & RC_DBG_LOG) {
+		fprintf(stderr, "r300compiler error: ");
+
+		va_start(ap, fmt);
+		vfprintf(stderr, fmt, ap);
+		va_end(ap);
+	}
+}
+
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
+{
+	rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
+	return 1;
+}
+
+/**
+ * Recompute c->Program.InputsRead and c->Program.OutputsWritten
+ * based on which inputs and outputs are actually referenced
+ * in program instructions.
+ */
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
+{
+	struct rc_instruction *inst;
+
+	c->Program.InputsRead = 0;
+	c->Program.OutputsWritten = 0;
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
+	{
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		int i;
+
+		for (i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
+				c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
+		}
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
+				c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
+		}
+	}
+}
+
+/**
+ * Rewrite the program such that everything that source the given input
+ * register will source new_input instead.
+ */
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
+{
+	struct rc_instruction * inst;
+
+	c->Program.InputsRead &= ~(1 << input);
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		unsigned i;
+
+		for(i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
+				inst->U.I.SrcReg[i].File = new_input.File;
+				inst->U.I.SrcReg[i].Index = new_input.Index;
+				inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
+				if (!inst->U.I.SrcReg[i].Abs) {
+					inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
+					inst->U.I.SrcReg[i].Abs = new_input.Abs;
+				}
+
+				c->Program.InputsRead |= 1 << new_input.Index;
+			}
+		}
+	}
+}
+
+
+/**
+ * Rewrite the program such that everything that writes into the given
+ * output register will instead write to new_output. The new_output
+ * writemask is honoured.
+ */
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
+{
+	struct rc_instruction * inst;
+
+	c->Program.OutputsWritten &= ~(1 << output);
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+				inst->U.I.DstReg.Index = new_output;
+				inst->U.I.DstReg.WriteMask &= writemask;
+
+				c->Program.OutputsWritten |= 1 << new_output;
+			}
+		}
+	}
+}
+
+
+/**
+ * Rewrite the program such that a given output is duplicated.
+ */
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
+{
+	unsigned tempreg = rc_find_free_temporary(c);
+	struct rc_instruction * inst;
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+				inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+				inst->U.I.DstReg.Index = tempreg;
+			}
+		}
+	}
+
+	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+	inst->U.I.Opcode = RC_OPCODE_MOV;
+	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+	inst->U.I.DstReg.Index = output;
+
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = tempreg;
+	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+	inst->U.I.Opcode = RC_OPCODE_MOV;
+	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+	inst->U.I.DstReg.Index = dup_output;
+
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = tempreg;
+	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+	c->Program.OutputsWritten |= 1 << dup_output;
+}
+
+
+/**
+ * Introduce standard code fragment to deal with fragment.position.
+ */
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
+                                int full_vtransform)
+{
+	unsigned tempregi = rc_find_free_temporary(c);
+	struct rc_instruction * inst_rcp;
+	struct rc_instruction * inst_mul;
+	struct rc_instruction * inst_mad;
+	struct rc_instruction * inst;
+
+	c->Program.InputsRead &= ~(1 << wpos);
+	c->Program.InputsRead |= 1 << new_input;
+
+	/* perspective divide */
+	inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
+	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+
+	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_rcp->U.I.DstReg.Index = tempregi;
+	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+
+	inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
+	inst_rcp->U.I.SrcReg[0].Index = new_input;
+	inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+
+	inst_mul = rc_insert_new_instruction(c, inst_rcp);
+	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+
+	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mul->U.I.DstReg.Index = tempregi;
+	inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+
+	inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
+	inst_mul->U.I.SrcReg[0].Index = new_input;
+
+	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+	inst_mul->U.I.SrcReg[1].Index = tempregi;
+	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+
+	/* viewport transformation */
+	inst_mad = rc_insert_new_instruction(c, inst_mul);
+	inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+
+	inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mad->U.I.DstReg.Index = tempregi;
+	inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+
+	inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst_mad->U.I.SrcReg[0].Index = tempregi;
+	inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+	inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+	inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+
+	inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
+	inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
+
+	if (full_vtransform) {
+		inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
+		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
+	} else {
+		inst_mad->U.I.SrcReg[1].Index =
+		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
+	}
+
+	for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		unsigned i;
+
+		for(i = 0; i < opcode->NumSrcRegs; i++) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+			    inst->U.I.SrcReg[i].Index == wpos) {
+				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+				inst->U.I.SrcReg[i].Index = tempregi;
+			}
+		}
+	}
+}
+
+
+/**
+ * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
+ * Gallium and OpenGL define it the other way around.
+ *
+ * So let's just negate FACE at the beginning of the shader and rewrite the rest
+ * of the shader to read from the newly allocated temporary.
+ */
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
+{
+	unsigned tempregi = rc_find_free_temporary(c);
+	struct rc_instruction *inst_add;
+	struct rc_instruction *inst;
+
+	/* perspective divide */
+	inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
+	inst_add->U.I.Opcode = RC_OPCODE_ADD;
+
+	inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_add->U.I.DstReg.Index = tempregi;
+	inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
+
+	inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
+	inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+
+	inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
+	inst_add->U.I.SrcReg[1].Index = face;
+	inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
+	inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+	for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		unsigned i;
+
+		for(i = 0; i < opcode->NumSrcRegs; i++) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+			    inst->U.I.SrcReg[i].Index == face) {
+				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+				inst->U.I.SrcReg[i].Index = tempregi;
+			}
+		}
+	}
+}
+
+static void reg_count_callback(void * userdata, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	int *max_reg = userdata;
+	if (file == RC_FILE_TEMPORARY)
+		(int)index > *max_reg ? *max_reg = index : 0;
+}
+
+void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
+{
+	int max_reg = -1;
+	struct rc_instruction * tmp;
+	memset(s, 0, sizeof(*s));
+
+	for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
+							tmp = tmp->Next){
+		const struct rc_opcode_info * info;
+		rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
+		if (tmp->Type == RC_INSTRUCTION_NORMAL) {
+			info = rc_get_opcode_info(tmp->U.I.Opcode);
+			if (info->Opcode == RC_OPCODE_BEGIN_TEX)
+				continue;
+			if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
+				s->num_presub_ops++;
+		} else {
+			if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
+				s->num_presub_ops++;
+			if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
+				s->num_presub_ops++;
+			/* Assuming alpha will never be a flow control or
+			 * a tex instruction. */
+			if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
+				s->num_alpha_insts++;
+			if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
+				s->num_rgb_insts++;
+			info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
+		}
+		if (info->IsFlowControl)
+			s->num_fc_insts++;
+		if (info->HasTexture)
+			s->num_tex_insts++;
+		s->num_insts++;
+	}
+	s->num_temp_regs = max_reg + 1;
+}
+
+static void print_stats(struct radeon_compiler * c)
+{
+	struct rc_program_stats s;
+
+	if (c->initial_num_insts <= 5)
+		return;
+
+	rc_get_stats(c, &s);
+
+	switch (c->type) {
+	case RC_VERTEX_PROGRAM:
+		fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
+			       "~%4u Instructions\n"
+			       "~%4u Flow Control Instructions\n"
+			       "~%4u Temporary Registers\n"
+			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
+			       s.num_insts, s.num_fc_insts, s.num_temp_regs);
+		break;
+
+	case RC_FRAGMENT_PROGRAM:
+		fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
+			       "~%4u Instructions\n"
+			       "~%4u Vector Instructions (RGB)\n"
+			       "~%4u Scalar Instructions (Alpha)\n"
+			       "~%4u Flow Control Instructions\n"
+			       "~%4u Texture Instructions\n"
+			       "~%4u Presub Operations\n"
+			       "~%4u Temporary Registers\n"
+			       "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
+			       s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
+			       s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
+			       s.num_temp_regs);
+		break;
+	default:
+		assert(0);
+	}
+}
+
+static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
+	"Vertex Program",
+	"Fragment Program"
+};
+
+void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
+{
+	for (unsigned i = 0; list[i].name; i++) {
+		if (list[i].predicate) {
+			list[i].run(c, list[i].user);
+
+			if (c->Error)
+				return;
+
+			if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
+				fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
+				rc_print_program(&c->Program);
+			}
+		}
+	}
+}
+
+/* Executes a list of compiler passes given in the parameter 'list'. */
+void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
+{
+	struct rc_program_stats s;
+
+	rc_get_stats(c, &s);
+	c->initial_num_insts = s.num_insts;
+
+	if (c->Debug & RC_DBG_LOG) {
+		fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
+		rc_print_program(&c->Program);
+	}
+
+	rc_run_compiler_passes(c, list);
+
+	if (c->Debug & RC_DBG_STATS)
+		print_stats(c);
+}
+
+void rc_validate_final_shader(struct radeon_compiler *c, void *user)
+{
+	/* Check the number of constants. */
+	if (c->Program.Constants.Count > c->max_constants) {
+		rc_error(c, "Too many constants. Max: %i, Got: %i\n",
+			 c->max_constants, c->Program.Constants.Count);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h
new file mode 100644
index 00000000000..74594af23c2
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_COMPILER_H
+#define RADEON_COMPILER_H
+
+#include "main/compiler.h"
+
+#include "memory_pool.h"
+#include "radeon_code.h"
+#include "radeon_program.h"
+#include "radeon_emulate_loops.h"
+
+#define RC_DBG_LOG        (1 << 0)
+#define RC_DBG_STATS      (1 << 1)
+
+struct rc_swizzle_caps;
+
+enum rc_program_type {
+	RC_VERTEX_PROGRAM,
+	RC_FRAGMENT_PROGRAM,
+	RC_NUM_PROGRAM_TYPES
+};
+
+struct radeon_compiler {
+	struct memory_pool Pool;
+	struct rc_program Program;
+	enum rc_program_type type;
+	unsigned Debug:2;
+	unsigned Error:1;
+	char * ErrorMsg;
+
+	/* Hardware specification. */
+	unsigned is_r400:1;
+	unsigned is_r500:1;
+	unsigned has_half_swizzles:1;
+	unsigned has_presub:1;
+	unsigned disable_optimizations:1;
+	unsigned max_temp_regs;
+	unsigned max_constants;
+	int max_alu_insts;
+	unsigned max_tex_insts;
+
+	/* Whether to remove unused constants and empty holes in constant space. */
+	unsigned remove_unused_constants:1;
+
+	/**
+	 * Variables used internally, not be touched by callers
+	 * of the compiler
+	 */
+	/*@{*/
+	struct rc_swizzle_caps * SwizzleCaps;
+	/*@}*/
+
+	struct emulate_loop_state loop_state;
+
+	unsigned initial_num_insts; /* Number of instructions at start. */
+};
+
+void rc_init(struct radeon_compiler * c);
+void rc_destroy(struct radeon_compiler * c);
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
+void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
+
+/**
+ * This macro acts like an if-statement that can be used to implement
+ * non-aborting assertions in the compiler.
+ *
+ * It checks whether \p cond is true. If not, an internal compiler error is
+ * flagged and the if-clause is run.
+ *
+ * A typical use-case would be:
+ *
+ *  if (rc_assert(c, condition-that-must-be-true))
+ *  	return;
+ */
+#define rc_assert(c, cond) \
+	(!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
+
+void rc_calculate_inputs_outputs(struct radeon_compiler * c);
+
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input);
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
+                                int full_vtransform);
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face);
+
+struct r300_fragment_program_compiler {
+	struct radeon_compiler Base;
+	struct rX00_fragment_program_code *code;
+	/* Optional transformations and features. */
+	struct r300_fragment_program_external_state state;
+	unsigned enable_shadow_ambient;
+	/* Register corresponding to the depthbuffer. */
+	unsigned OutputDepth;
+	/* Registers corresponding to the four colorbuffers. */
+	unsigned OutputColor[4];
+
+	void * UserData;
+	void (*AllocateHwInputs)(
+		struct r300_fragment_program_compiler * c,
+		void (*allocate)(void * data, unsigned input, unsigned hwreg),
+		void * mydata);
+};
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
+
+struct r300_vertex_program_compiler {
+	struct radeon_compiler Base;
+	struct r300_vertex_program_code *code;
+	uint32_t RequiredOutputs;
+
+	void * UserData;
+	void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
+
+	int PredicateIndex;
+	unsigned int PredicateMask;
+};
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user);
+
+struct radeon_compiler_pass {
+	const char *name;	/* Name of the pass. */
+	int dump;		/* Dump the program if Debug == 1? */
+	int predicate;		/* Run this pass? */
+	void (*run)(struct radeon_compiler *c, void *user); /* The main entrypoint. */
+	void *user;		/* Optional parameter which is passed to the run function. */
+};
+
+struct rc_program_stats {
+	unsigned num_insts;
+	unsigned num_fc_insts;
+	unsigned num_tex_insts;
+	unsigned num_rgb_insts;
+	unsigned num_alpha_insts;
+	unsigned num_presub_ops;
+	unsigned num_temp_regs;
+};
+
+void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
+
+/* Executes a list of compiler passes given in the parameter 'list'. */
+void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list);
+void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list);
+void rc_validate_final_shader(struct radeon_compiler *c, void *user);
+
+#endif /* RADEON_COMPILER_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.c b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c
new file mode 100644
index 00000000000..2742721f800
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.c
@@ -0,0 +1,701 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_compiler_util.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+/**
+ */
+unsigned int rc_swizzle_to_writemask(unsigned int swz)
+{
+	unsigned int mask = 0;
+	unsigned int i;
+
+	for(i = 0; i < 4; i++) {
+		mask |= 1 << GET_SWZ(swz, i);
+	}
+	mask &= RC_MASK_XYZW;
+
+	return mask;
+}
+
+rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
+{
+	if (idx & 0x4)
+		return idx;
+	return GET_SWZ(swz, idx);
+}
+
+/**
+ * The purpose of this function is to standardize the number channels used by
+ * swizzles.  All swizzles regardless of what instruction they are a part of
+ * should have 4 channels initialized with values.
+ * @param channels The number of channels in initial_value that have a
+ * meaningful value.
+ * @return An initialized swizzle that has all of the unused channels set to
+ * RC_SWIZZLE_UNUSED.
+ */
+unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels)
+{
+	unsigned int i;
+	for (i = channels; i < 4; i++) {
+		SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED);
+	}
+	return initial_value;
+}
+
+unsigned int combine_swizzles4(unsigned int src,
+		rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
+{
+	unsigned int ret = 0;
+
+	ret |= get_swz(src, swz_x);
+	ret |= get_swz(src, swz_y) << 3;
+	ret |= get_swz(src, swz_z) << 6;
+	ret |= get_swz(src, swz_w) << 9;
+
+	return ret;
+}
+
+unsigned int combine_swizzles(unsigned int src, unsigned int swz)
+{
+	unsigned int ret = 0;
+
+	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
+	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
+	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
+	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
+
+	return ret;
+}
+
+/**
+ * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
+ */
+rc_swizzle rc_mask_to_swizzle(unsigned int mask)
+{
+	switch (mask) {
+	case RC_MASK_X: return RC_SWIZZLE_X;
+	case RC_MASK_Y: return RC_SWIZZLE_Y;
+	case RC_MASK_Z: return RC_SWIZZLE_Z;
+	case RC_MASK_W: return RC_SWIZZLE_W;
+	}
+	return RC_SWIZZLE_UNUSED;
+}
+
+/* Reorder mask bits according to swizzle. */
+unsigned swizzle_mask(unsigned swizzle, unsigned mask)
+{
+	unsigned ret = 0;
+	for (unsigned chan = 0; chan < 4; ++chan) {
+		unsigned swz = GET_SWZ(swizzle, chan);
+		if (swz < 4)
+			ret |= GET_BIT(mask, swz) << chan;
+	}
+	return ret;
+}
+
+static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)
+{
+	if (info->HasTexture) {
+		return 0;
+	}
+	switch (info->Opcode) {
+		case RC_OPCODE_DP2:
+		case RC_OPCODE_DP3:
+		case RC_OPCODE_DP4:
+		case RC_OPCODE_DDX:
+		case RC_OPCODE_DDY:
+			return 0;
+		default:
+			return 1;
+	}
+}
+
+/**
+ * @return A swizzle the results from converting old_swizzle using
+ * conversion_swizzle
+ */
+unsigned int rc_adjust_channels(
+	unsigned int old_swizzle,
+	unsigned int conversion_swizzle)
+{
+	unsigned int i;
+	unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+	for (i = 0; i < 4; i++) {
+		unsigned int new_chan = get_swz(conversion_swizzle, i);
+		if (new_chan == RC_SWIZZLE_UNUSED) {
+			continue;
+		}
+		SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i));
+	}
+	return new_swizzle;
+}
+
+static unsigned int rewrite_writemask(
+	unsigned int old_mask,
+	unsigned int conversion_swizzle)
+{
+	unsigned int new_mask = 0;
+	unsigned int i;
+
+	for (i = 0; i < 4; i++) {
+		if (!GET_BIT(old_mask, i)
+		   || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) {
+			continue;
+		}
+		new_mask |= (1 << GET_SWZ(conversion_swizzle, i));
+	}
+
+	return new_mask;
+}
+
+/**
+ * This function rewrites the writemask of sub and adjusts the swizzles
+ * of all its source registers based on the conversion_swizzle.
+ * conversion_swizzle represents a mapping of the old writemask to the
+ * new writemask.  For a detailed description of how conversion swizzles
+ * work see rc_rewrite_swizzle().
+ */
+void rc_pair_rewrite_writemask(
+	struct rc_pair_sub_instruction * sub,
+	unsigned int conversion_swizzle)
+{
+	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+	unsigned int i;
+
+	sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle);
+
+	if (!srcs_need_rewrite(info)) {
+		return ;
+	}
+
+	for (i = 0; i < info->NumSrcRegs; i++) {
+		sub->Arg[i].Swizzle =
+			rc_adjust_channels(sub->Arg[i].Swizzle,
+						conversion_swizzle);
+	}
+}
+
+static void normal_rewrite_writemask_cb(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	unsigned int * new_mask = (unsigned int *)userdata;
+	src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask);
+}
+
+/**
+ * This function is the same as rc_pair_rewrite_writemask() except it
+ * operates on normal instructions.
+ */
+void rc_normal_rewrite_writemask(
+	struct rc_instruction * inst,
+	unsigned int conversion_swizzle)
+{
+	unsigned int new_mask;
+	struct rc_sub_instruction * sub = &inst->U.I;
+	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+	sub->DstReg.WriteMask =
+		rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle);
+
+	if (info->HasTexture) {
+		unsigned int i;
+		assert(sub->TexSwizzle == RC_SWIZZLE_XYZW);
+		for (i = 0; i < 4; i++) {
+			unsigned int swz = GET_SWZ(conversion_swizzle, i);
+			if (swz > 3)
+				continue;
+			SET_SWZ(sub->TexSwizzle, swz, i);
+		}
+	}
+
+	if (!srcs_need_rewrite(info)) {
+		return;
+	}
+
+	new_mask = sub->DstReg.WriteMask;
+	rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask);
+}
+
+/**
+ * This function replaces each value 'swz' in swizzle with the value of
+ * GET_SWZ(conversion_swizzle, swz).  So, if you want to change all the X's
+ * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9).  If you want
+ * to change all the Y's in swizzle to X, then conversion_swizzle should be
+ * _X__ (0xfc7).  If you want to change the Y's to X and the X's to Y, then
+ * conversion swizzle should be YX__ (0xfc1).
+ * @param swizzle The swizzle to change
+ * @param conversion_swizzle Describes the conversion to perform on the swizzle
+ * @return A converted swizzle
+ */
+unsigned int rc_rewrite_swizzle(
+	unsigned int swizzle,
+	unsigned int conversion_swizzle)
+{
+	unsigned int chan;
+	unsigned int out_swizzle = swizzle;
+
+	for (chan = 0; chan < 4; chan++) {
+		unsigned int swz = GET_SWZ(swizzle, chan);
+		unsigned int new_swz;
+		if (swz > 3) {
+			SET_SWZ(out_swizzle, chan, swz);
+		} else {
+			new_swz = GET_SWZ(conversion_swizzle, swz);
+			if (new_swz != RC_SWIZZLE_UNUSED) {
+				SET_SWZ(out_swizzle, chan, new_swz);
+			} else {
+				SET_SWZ(out_swizzle, chan, swz);
+			}
+		}
+	}
+	return out_swizzle;
+}
+
+/**
+ * Left multiplication of a register with a swizzle
+ */
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+{
+	struct rc_src_register tmp = srcreg;
+	int i;
+	tmp.Swizzle = 0;
+	tmp.Negate = 0;
+	for(i = 0; i < 4; ++i) {
+		rc_swizzle swz = GET_SWZ(swizzle, i);
+		if (swz < 4) {
+			tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+			tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
+		} else {
+			tmp.Swizzle |= swz << (i*3);
+		}
+	}
+	return tmp;
+}
+
+void reset_srcreg(struct rc_src_register* reg)
+{
+	memset(reg, 0, sizeof(struct rc_src_register));
+	reg->Swizzle = RC_SWIZZLE_XYZW;
+}
+
+unsigned int rc_src_reads_dst_mask(
+		rc_register_file src_file,
+		unsigned int src_idx,
+		unsigned int src_swz,
+		rc_register_file dst_file,
+		unsigned int dst_idx,
+		unsigned int dst_mask)
+{
+	if (src_file != dst_file || src_idx != dst_idx) {
+		return RC_MASK_NONE;
+	}
+	return dst_mask & rc_swizzle_to_writemask(src_swz);
+}
+
+/**
+ * @return A bit mask specifying whether this swizzle will select from an RGB
+ * source, an Alpha source, or both.
+ */
+unsigned int rc_source_type_swz(unsigned int swizzle)
+{
+	unsigned int chan;
+	unsigned int swz = RC_SWIZZLE_UNUSED;
+	unsigned int ret = RC_SOURCE_NONE;
+
+	for(chan = 0; chan < 4; chan++) {
+		swz = GET_SWZ(swizzle, chan);
+		if (swz == RC_SWIZZLE_W) {
+			ret |= RC_SOURCE_ALPHA;
+		} else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+						|| swz == RC_SWIZZLE_Z) {
+			ret |= RC_SOURCE_RGB;
+		}
+	}
+	return ret;
+}
+
+unsigned int rc_source_type_mask(unsigned int mask)
+{
+	unsigned int ret = RC_SOURCE_NONE;
+
+	if (mask & RC_MASK_XYZ)
+		ret |= RC_SOURCE_RGB;
+
+	if (mask & RC_MASK_W)
+		ret |= RC_SOURCE_ALPHA;
+
+	return ret;
+}
+
+struct src_select {
+	rc_register_file File;
+	int Index;
+	unsigned int SrcType;
+};
+
+struct can_use_presub_data {
+	struct src_select Selects[5];
+	unsigned int SelectCount;
+	const struct rc_src_register * ReplaceReg;
+	unsigned int ReplaceRemoved;
+};
+
+static void can_use_presub_data_add_select(
+	struct can_use_presub_data * data,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int src_type)
+{
+	struct src_select * select;
+
+	select = &data->Selects[data->SelectCount++];
+	select->File = file;
+	select->Index = index;
+	select->SrcType = src_type;
+}
+
+/**
+ * This callback function counts the number of sources in inst that are
+ * different from the sources in can_use_presub_data->RemoveSrcs.
+ */
+static void can_use_presub_read_cb(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	struct can_use_presub_data * d = userdata;
+
+	if (!d->ReplaceRemoved && src == d->ReplaceReg) {
+		d->ReplaceRemoved = 1;
+		return;
+	}
+
+	if (src->File == RC_FILE_NONE)
+		return;
+
+	can_use_presub_data_add_select(d, src->File, src->Index,
+					rc_source_type_swz(src->Swizzle));
+}
+
+unsigned int rc_inst_can_use_presub(
+	struct rc_instruction * inst,
+	rc_presubtract_op presub_op,
+	unsigned int presub_writemask,
+	const struct rc_src_register * replace_reg,
+	const struct rc_src_register * presub_src0,
+	const struct rc_src_register * presub_src1)
+{
+	struct can_use_presub_data d;
+	unsigned int num_presub_srcs;
+	unsigned int i;
+	const struct rc_opcode_info * info =
+					rc_get_opcode_info(inst->U.I.Opcode);
+	int rgb_count = 0, alpha_count = 0;
+	unsigned int src_type0, src_type1;
+
+	if (presub_op == RC_PRESUB_NONE) {
+		return 1;
+	}
+
+	if (info->HasTexture) {
+		return 0;
+	}
+
+	/* We can't use more than one presubtract value in an
+	 * instruction, unless the two prsubtract operations
+	 * are the same and read from the same registers.
+	 * XXX For now we will limit instructions to only one presubtract
+	 * value.*/
+	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+		return 0;
+	}
+
+	memset(&d, 0, sizeof(d));
+	d.ReplaceReg = replace_reg;
+
+	rc_for_all_reads_src(inst, can_use_presub_read_cb, &d);
+
+	num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
+
+	src_type0 = rc_source_type_swz(presub_src0->Swizzle);
+	can_use_presub_data_add_select(&d,
+		presub_src0->File,
+		presub_src0->Index,
+		src_type0);
+
+	if (num_presub_srcs > 1) {
+		src_type1 = rc_source_type_swz(presub_src1->Swizzle);
+		can_use_presub_data_add_select(&d,
+			presub_src1->File,
+			presub_src1->Index,
+			src_type1);
+
+		/* Even if both of the presub sources read from the same
+		 * register, we still need to use 2 different source selects
+		 * for them, so we need to increment the count to compensate.
+		 */
+		if (presub_src0->File == presub_src1->File
+		    && presub_src0->Index == presub_src1->Index) {
+			if (src_type0 & src_type1 & RC_SOURCE_RGB) {
+				rgb_count++;
+			}
+			if (src_type0 & src_type1 & RC_SOURCE_ALPHA) {
+				alpha_count++;
+			}
+		}
+	}
+
+	/* Count the number of source selects for Alpha and RGB.  If we
+	 * encounter two of the same source selects then we can ignore the
+	 * first one. */
+	for (i = 0; i < d.SelectCount; i++) {
+		unsigned int j;
+		unsigned int src_type = d.Selects[i].SrcType;
+		for (j = i + 1; j < d.SelectCount; j++) {
+			if (d.Selects[i].File == d.Selects[j].File
+			    && d.Selects[i].Index == d.Selects[j].Index) {
+				src_type &= ~d.Selects[j].SrcType;
+			}
+		}
+		if (src_type & RC_SOURCE_RGB) {
+			rgb_count++;
+		}
+
+		if (src_type & RC_SOURCE_ALPHA) {
+			alpha_count++;
+		}
+	}
+
+	if (rgb_count > 3 || alpha_count > 3) {
+		return 0;
+	}
+
+	return 1;
+}
+
+struct max_data {
+	unsigned int Max;
+	unsigned int HasFileType;
+	rc_register_file File;
+};
+
+static void max_callback(
+	void * userdata,
+	struct rc_instruction * inst,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int mask)
+{
+	struct max_data * d = (struct max_data*)userdata;
+	if (file == d->File && (!d->HasFileType || index > d->Max)) {
+		d->Max = index;
+		d->HasFileType = 1;
+	}
+}
+
+/**
+ * @return The maximum index of the specified register file used by the
+ * program.
+ */
+int rc_get_max_index(
+	struct radeon_compiler * c,
+	rc_register_file file)
+{
+	struct max_data data;
+	struct rc_instruction * inst;
+	data.Max = 0;
+	data.HasFileType = 0;
+	data.File = file;
+	for (inst = c->Program.Instructions.Next;
+					inst != &c->Program.Instructions;
+					inst = inst->Next) {
+		rc_for_all_reads_mask(inst, max_callback, &data);
+		rc_for_all_writes_mask(inst, max_callback, &data);
+	}
+	if (!data.HasFileType) {
+		return -1;
+	} else {
+		return data.Max;
+	}
+}
+
+static unsigned int get_source_readmask(
+	struct rc_pair_sub_instruction * sub,
+	unsigned int source,
+	unsigned int src_type)
+{
+	unsigned int i;
+	unsigned int readmask = 0;
+	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+
+	for (i = 0; i < info->NumSrcRegs; i++) {
+		if (sub->Arg[i].Source != source
+		    || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) {
+			continue;
+		}
+		readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle);
+	}
+	return readmask;
+}
+
+/**
+ * This function attempts to remove a source from a pair instructions.
+ * @param inst
+ * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd
+ * @param source The index of the source to remove
+ * @param new_readmask A mask representing the components that are read by
+ * the source that is intended to replace the one you are removing.  If you
+ * want to remove a source only and not replace it, this parameter should be
+ * zero.
+ * @return 1 if the source was successfully removed, 0 if it was not
+ */
+unsigned int rc_pair_remove_src(
+	struct rc_instruction * inst,
+	unsigned int src_type,
+	unsigned int source,
+	unsigned int new_readmask)
+{
+	unsigned int readmask = 0;
+
+	readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type);
+	readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type);
+
+	if ((new_readmask & readmask) != readmask)
+		return 0;
+
+	if (src_type & RC_SOURCE_RGB) {
+		memset(&inst->U.P.RGB.Src[source], 0,
+			sizeof(struct rc_pair_instruction_source));
+	}
+
+	if (src_type & RC_SOURCE_ALPHA) {
+		memset(&inst->U.P.Alpha.Src[source], 0,
+			sizeof(struct rc_pair_instruction_source));
+	}
+
+	return 1;
+}
+
+/**
+ * @return RC_OPCODE_NOOP if inst is not a flow control instruction.
+ * @return The opcode of inst if it is a flow control instruction.
+ */
+rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * info;
+	if (inst->Type == RC_INSTRUCTION_NORMAL) {
+		info = rc_get_opcode_info(inst->U.I.Opcode);
+	} else {
+		info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
+		/*A flow control instruction shouldn't have an alpha
+		 * instruction.*/
+		assert(!info->IsFlowControl ||
+				inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
+	}
+
+	if (info->IsFlowControl)
+		return info->Opcode;
+	else
+		return RC_OPCODE_NOP;
+
+}
+
+/**
+ * @return The BGNLOOP instruction that starts the loop ended by endloop.
+ */
+struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop)
+{
+	unsigned int endloop_count = 0;
+	struct rc_instruction * inst;
+	for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) {
+		rc_opcode op = rc_get_flow_control_inst(inst);
+		if (op == RC_OPCODE_ENDLOOP) {
+			endloop_count++;
+		} else if (op == RC_OPCODE_BGNLOOP) {
+			if (endloop_count == 0) {
+				return inst;
+			} else {
+				endloop_count--;
+			}
+		}
+	}
+	return NULL;
+}
+
+/**
+ * @return The ENDLOOP instruction that ends the loop started by bgnloop.
+ */
+struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop)
+{
+	unsigned int bgnloop_count = 0;
+	struct rc_instruction * inst;
+	for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) {
+		rc_opcode op = rc_get_flow_control_inst(inst);
+		if (op == RC_OPCODE_BGNLOOP) {
+			bgnloop_count++;
+		} else if (op == RC_OPCODE_ENDLOOP) {
+			if (bgnloop_count == 0) {
+				return inst;
+			} else {
+				bgnloop_count--;
+			}
+		}
+	}
+	return NULL;
+}
+
+/**
+ * @return A conversion swizzle for converting from old_mask->new_mask
+ */
+unsigned int rc_make_conversion_swizzle(
+	unsigned int old_mask,
+	unsigned int new_mask)
+{
+	unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+	unsigned int old_idx;
+	unsigned int new_idx = 0;
+	for (old_idx = 0; old_idx < 4; old_idx++) {
+		if (!GET_BIT(old_mask, old_idx))
+			continue;
+		for ( ; new_idx < 4; new_idx++) {
+			if (GET_BIT(new_mask, new_idx)) {
+				SET_SWZ(conversion_swizzle, old_idx, new_idx);
+				new_idx++;
+				break;
+			}
+		}
+	}
+	return conversion_swizzle;
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler_util.h b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h
new file mode 100644
index 00000000000..3730aa888c0
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_compiler_util.h
@@ -0,0 +1,89 @@
+#include "radeon_program_constants.h"
+
+#ifndef RADEON_PROGRAM_UTIL_H
+#define RADEON_PROGRAM_UTIL_H
+
+#include "radeon_opcodes.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_pair_instruction;
+struct rc_pair_sub_instruction;
+struct rc_src_register;
+
+unsigned int rc_swizzle_to_writemask(unsigned int swz);
+
+rc_swizzle get_swz(unsigned int swz, rc_swizzle idx);
+
+unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels);
+
+unsigned int combine_swizzles4(unsigned int src,
+			       rc_swizzle swz_x, rc_swizzle swz_y,
+			       rc_swizzle swz_z, rc_swizzle swz_w);
+
+unsigned int combine_swizzles(unsigned int src, unsigned int swz);
+
+rc_swizzle rc_mask_to_swizzle(unsigned int mask);
+
+unsigned swizzle_mask(unsigned swizzle, unsigned mask);
+
+unsigned int rc_adjust_channels(
+	unsigned int old_swizzle,
+	unsigned int conversion_swizzle);
+
+void rc_pair_rewrite_writemask(
+	struct rc_pair_sub_instruction * sub,
+	unsigned int conversion_swizzle);
+
+void rc_normal_rewrite_writemask(
+	struct rc_instruction * inst,
+	unsigned int conversion_swizzle);
+
+unsigned int rc_rewrite_swizzle(
+	unsigned int swizzle,
+	unsigned int new_mask);
+
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
+
+void reset_srcreg(struct rc_src_register* reg);
+
+unsigned int rc_src_reads_dst_mask(
+		rc_register_file src_file,
+		unsigned int src_idx,
+		unsigned int src_swz,
+		rc_register_file dst_file,
+		unsigned int dst_idx,
+		unsigned int dst_mask);
+
+unsigned int rc_source_type_swz(unsigned int swizzle);
+
+unsigned int rc_source_type_mask(unsigned int mask);
+
+unsigned int rc_inst_can_use_presub(
+	struct rc_instruction * inst,
+	rc_presubtract_op presub_op,
+	unsigned int presub_writemask,
+	const struct rc_src_register * replace_reg,
+	const struct rc_src_register * presub_src0,
+	const struct rc_src_register * presub_src1);
+
+int rc_get_max_index(
+	struct radeon_compiler * c,
+	rc_register_file file);
+
+unsigned int rc_pair_remove_src(
+	struct rc_instruction * inst,
+	unsigned int src_type,
+	unsigned int source,
+	unsigned int new_readmask);
+
+rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst);
+
+struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop);
+struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop);
+
+unsigned int rc_make_conversion_swizzle(
+	unsigned int old_mask,
+	unsigned int new_mask);
+
+#endif /* RADEON_PROGRAM_UTIL_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.c b/src/gallium/drivers/r300/compiler/radeon_dataflow.c
new file mode 100644
index 00000000000..a8decacedaf
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.c
@@ -0,0 +1,892 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_program.h"
+
+struct read_write_mask_data {
+	void * UserData;
+	rc_read_write_mask_fn Cb;
+};
+
+static void reads_normal_callback(
+	void * userdata,
+	struct rc_instruction * fullinst,
+	struct rc_src_register * src)
+{
+	struct read_write_mask_data * cb_data = userdata;
+	unsigned int refmask = 0;
+	unsigned int chan;
+	for(chan = 0; chan < 4; chan++) {
+		refmask |= 1 << GET_SWZ(src->Swizzle, chan);
+	}
+	refmask &= RC_MASK_XYZW;
+
+	if (refmask) {
+		cb_data->Cb(cb_data->UserData, fullinst, src->File,
+							src->Index, refmask);
+	}
+
+	if (refmask && src->RelAddr) {
+		cb_data->Cb(cb_data->UserData, fullinst, RC_FILE_ADDRESS, 0,
+								RC_MASK_X);
+	}
+}
+
+static void pair_get_src_refmasks(unsigned int * refmasks,
+					struct rc_pair_instruction * inst,
+					unsigned int swz, unsigned int src)
+{
+	if (swz >= 4)
+		return;
+
+	if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) {
+		if(src == RC_PAIR_PRESUB_SRC) {
+			unsigned int i;
+			int srcp_regs =
+				rc_presubtract_src_reg_count(
+				inst->RGB.Src[src].Index);
+			for(i = 0; i < srcp_regs; i++) {
+				refmasks[i] |= 1 << swz;
+			}
+		}
+		else {
+			refmasks[src] |= 1 << swz;
+		}
+	}
+
+	if (swz == RC_SWIZZLE_W) {
+		if (src == RC_PAIR_PRESUB_SRC) {
+			unsigned int i;
+			int srcp_regs = rc_presubtract_src_reg_count(
+					inst->Alpha.Src[src].Index);
+			for(i = 0; i < srcp_regs; i++) {
+				refmasks[i] |= 1 << swz;
+			}
+		}
+		else {
+			refmasks[src] |= 1 << swz;
+		}
+	}
+}
+
+static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+	struct rc_pair_instruction * inst = &fullinst->U.P;
+	unsigned int refmasks[3] = { 0, 0, 0 };
+
+	unsigned int arg;
+
+	for(arg = 0; arg < 3; ++arg) {
+		unsigned int chan;
+		for(chan = 0; chan < 3; ++chan) {
+			unsigned int swz_rgb =
+				GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
+			unsigned int swz_alpha =
+				GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan);
+			pair_get_src_refmasks(refmasks, inst, swz_rgb,
+						inst->RGB.Arg[arg].Source);
+			pair_get_src_refmasks(refmasks, inst, swz_alpha,
+						inst->Alpha.Arg[arg].Source);
+		}
+	}
+
+	for(unsigned int src = 0; src < 3; ++src) {
+		if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
+			cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
+			   refmasks[src] & RC_MASK_XYZ);
+
+		if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
+			cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
+	}
+}
+
+static void pair_sub_for_all_args(
+	struct rc_instruction * fullinst,
+	struct rc_pair_sub_instruction * sub,
+	rc_pair_read_arg_fn cb,
+	void * userdata)
+{
+	int i;
+	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+
+	for(i = 0; i < info->NumSrcRegs; i++) {
+		unsigned int src_type;
+
+		src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
+
+		if (src_type == RC_SOURCE_NONE)
+			continue;
+
+		if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) {
+			unsigned int presub_type;
+			unsigned int presub_src_count;
+			struct rc_pair_instruction_source * src_array;
+			unsigned int j;
+
+			if (src_type & RC_SOURCE_RGB) {
+				presub_type = fullinst->
+					U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index;
+				src_array = fullinst->U.P.RGB.Src;
+			} else {
+				presub_type = fullinst->
+					U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index;
+				src_array = fullinst->U.P.Alpha.Src;
+			}
+			presub_src_count
+				= rc_presubtract_src_reg_count(presub_type);
+			for(j = 0; j < presub_src_count; j++) {
+				cb(userdata, fullinst, &sub->Arg[i],
+								&src_array[j]);
+			}
+		} else {
+			struct rc_pair_instruction_source * src =
+				rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]);
+			if (src) {
+				cb(userdata, fullinst, &sub->Arg[i], src);
+			}
+		}
+	}
+}
+
+/* This function calls the callback function (cb) for each source used by
+ * the instruction.
+ * */
+void rc_for_all_reads_src(
+	struct rc_instruction * inst,
+	rc_read_src_fn cb,
+	void * userdata)
+{
+	const struct rc_opcode_info * opcode =
+					rc_get_opcode_info(inst->U.I.Opcode);
+
+	/* This function only works with normal instructions. */
+	if (inst->Type != RC_INSTRUCTION_NORMAL) {
+		assert(0);
+		return;
+	}
+
+	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+
+		if (inst->U.I.SrcReg[src].File == RC_FILE_NONE)
+			continue;
+
+		if (inst->U.I.SrcReg[src].File == RC_FILE_PRESUB) {
+			unsigned int i;
+			unsigned int srcp_regs = rc_presubtract_src_reg_count(
+						inst->U.I.PreSub.Opcode);
+			for( i = 0; i < srcp_regs; i++) {
+				cb(userdata, inst, &inst->U.I.PreSub.SrcReg[i]);
+			}
+		} else {
+			cb(userdata, inst, &inst->U.I.SrcReg[src]);
+		}
+	}
+}
+
+/**
+ * This function calls the callback function (cb) for each arg of the RGB and
+ * alpha components.
+ */
+void rc_pair_for_all_reads_arg(
+	struct rc_instruction * inst,
+	rc_pair_read_arg_fn cb,
+	void * userdata)
+{
+	/* This function only works with pair instructions. */
+	if (inst->Type != RC_INSTRUCTION_PAIR) {
+		assert(0);
+		return;
+	}
+
+	pair_sub_for_all_args(inst, &inst->U.P.RGB, cb, userdata);
+	pair_sub_for_all_args(inst, &inst->U.P.Alpha, cb, userdata);
+}
+
+/**
+ * Calls a callback function for all register reads.
+ *
+ * This is conservative, i.e. if the same register is referenced multiple times,
+ * the callback may also be called multiple times.
+ * Also, the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
+{
+	if (inst->Type == RC_INSTRUCTION_NORMAL) {
+		struct read_write_mask_data cb_data;
+		cb_data.UserData = userdata;
+		cb_data.Cb = cb;
+
+		rc_for_all_reads_src(inst, reads_normal_callback, &cb_data);
+	} else {
+		reads_pair(inst, cb, userdata);
+	}
+}
+
+
+
+static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+	struct rc_sub_instruction * inst = &fullinst->U.I;
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+	if (opcode->HasDstReg && inst->DstReg.WriteMask)
+		cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
+
+	if (inst->WriteALUResult)
+		cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
+}
+
+static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+	struct rc_pair_instruction * inst = &fullinst->U.P;
+
+	if (inst->RGB.WriteMask)
+		cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
+
+	if (inst->Alpha.WriteMask)
+		cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
+
+	if (inst->WriteALUResult)
+		cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
+}
+
+/**
+ * Calls a callback function for all register writes in the instruction,
+ * reporting writemasks to the callback function.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
+{
+	if (inst->Type == RC_INSTRUCTION_NORMAL) {
+		writes_normal(inst, cb, userdata);
+	} else {
+		writes_pair(inst, cb, userdata);
+	}
+}
+
+
+struct mask_to_chan_data {
+	void * UserData;
+	rc_read_write_chan_fn Fn;
+};
+
+static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct mask_to_chan_data * d = data;
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		if (GET_BIT(mask, chan))
+			d->Fn(d->UserData, inst, file, index, chan);
+	}
+}
+
+/**
+ * Calls a callback function for all sourced register channels.
+ *
+ * This is conservative, i.e. channels may be called multiple times,
+ * and the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+	struct mask_to_chan_data d;
+	d.UserData = userdata;
+	d.Fn = cb;
+	rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
+}
+
+/**
+ * Calls a callback function for all written register channels.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+	struct mask_to_chan_data d;
+	d.UserData = userdata;
+	d.Fn = cb;
+	rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
+}
+
+static void remap_normal_instruction(struct rc_instruction * fullinst,
+		rc_remap_register_fn cb, void * userdata)
+{
+	struct rc_sub_instruction * inst = &fullinst->U.I;
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+	unsigned int remapped_presub = 0;
+
+	if (opcode->HasDstReg) {
+		rc_register_file file = inst->DstReg.File;
+		unsigned int index = inst->DstReg.Index;
+
+		cb(userdata, fullinst, &file, &index);
+
+		inst->DstReg.File = file;
+		inst->DstReg.Index = index;
+	}
+
+	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+		rc_register_file file = inst->SrcReg[src].File;
+		unsigned int index = inst->SrcReg[src].Index;
+
+		if (file == RC_FILE_PRESUB) {
+			unsigned int i;
+			unsigned int srcp_srcs = rc_presubtract_src_reg_count(
+						inst->PreSub.Opcode);
+			/* Make sure we only remap presubtract sources once in
+			 * case more than one source register reads the
+			 * presubtract result. */
+			if (remapped_presub)
+				continue;
+
+			for(i = 0; i < srcp_srcs; i++) {
+				file = inst->PreSub.SrcReg[i].File;
+				index = inst->PreSub.SrcReg[i].Index;
+				cb(userdata, fullinst, &file, &index);
+				inst->PreSub.SrcReg[i].File = file;
+				inst->PreSub.SrcReg[i].Index = index;
+			}
+			remapped_presub = 1;
+		}
+		else {
+			cb(userdata, fullinst, &file, &index);
+
+			inst->SrcReg[src].File = file;
+			inst->SrcReg[src].Index = index;
+		}
+	}
+}
+
+static void remap_pair_instruction(struct rc_instruction * fullinst,
+		rc_remap_register_fn cb, void * userdata)
+{
+	struct rc_pair_instruction * inst = &fullinst->U.P;
+
+	if (inst->RGB.WriteMask) {
+		rc_register_file file = RC_FILE_TEMPORARY;
+		unsigned int index = inst->RGB.DestIndex;
+
+		cb(userdata, fullinst, &file, &index);
+
+		inst->RGB.DestIndex = index;
+	}
+
+	if (inst->Alpha.WriteMask) {
+		rc_register_file file = RC_FILE_TEMPORARY;
+		unsigned int index = inst->Alpha.DestIndex;
+
+		cb(userdata, fullinst, &file, &index);
+
+		inst->Alpha.DestIndex = index;
+	}
+
+	for(unsigned int src = 0; src < 3; ++src) {
+		if (inst->RGB.Src[src].Used) {
+			rc_register_file file = inst->RGB.Src[src].File;
+			unsigned int index = inst->RGB.Src[src].Index;
+
+			cb(userdata, fullinst, &file, &index);
+
+			inst->RGB.Src[src].File = file;
+			inst->RGB.Src[src].Index = index;
+		}
+
+		if (inst->Alpha.Src[src].Used) {
+			rc_register_file file = inst->Alpha.Src[src].File;
+			unsigned int index = inst->Alpha.Src[src].Index;
+
+			cb(userdata, fullinst, &file, &index);
+
+			inst->Alpha.Src[src].File = file;
+			inst->Alpha.Src[src].Index = index;
+		}
+	}
+}
+
+
+/**
+ * Remap all register accesses according to the given function.
+ * That is, call the function \p cb for each referenced register (both read and written)
+ * and update the given instruction \p inst accordingly
+ * if it modifies its \ref pfile and \ref pindex contents.
+ */
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata)
+{
+	if (inst->Type == RC_INSTRUCTION_NORMAL)
+		remap_normal_instruction(inst, cb, userdata);
+	else
+		remap_pair_instruction(inst, cb, userdata);
+}
+
+struct branch_write_mask {
+	unsigned int IfWriteMask:4;
+	unsigned int ElseWriteMask:4;
+	unsigned int HasElse:1;
+};
+
+union get_readers_read_cb {
+	rc_read_src_fn I;
+	rc_pair_read_arg_fn P;
+};
+
+struct get_readers_callback_data {
+	struct radeon_compiler * C;
+	struct rc_reader_data * ReaderData;
+	rc_read_src_fn ReadNormalCB;
+	rc_pair_read_arg_fn ReadPairCB;
+	rc_read_write_mask_fn WriteCB;
+	rc_register_file DstFile;
+	unsigned int DstIndex;
+	unsigned int DstMask;
+	unsigned int AliveWriteMask;
+	/*  For convenience, this is indexed starting at 1 */
+	struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1];
+};
+
+static struct rc_reader * add_reader(
+	struct memory_pool * pool,
+	struct rc_reader_data * data,
+	struct rc_instruction * inst,
+	unsigned int mask)
+{
+	struct rc_reader * new;
+	memory_pool_array_reserve(pool, struct rc_reader, data->Readers,
+				data->ReaderCount, data->ReadersReserved, 1);
+	new = &data->Readers[data->ReaderCount++];
+	new->Inst = inst;
+	new->WriteMask = mask;
+	return new;
+}
+
+static void add_reader_normal(
+	struct memory_pool * pool,
+	struct rc_reader_data * data,
+	struct rc_instruction * inst,
+	unsigned int mask,
+	struct rc_src_register * src)
+{
+	struct rc_reader * new = add_reader(pool, data, inst, mask);
+	new->U.I.Src = src;
+}
+
+
+static void add_reader_pair(
+	struct memory_pool * pool,
+	struct rc_reader_data * data,
+	struct rc_instruction * inst,
+	unsigned int mask,
+	struct rc_pair_instruction_arg * arg,
+	struct rc_pair_instruction_source * src)
+{
+	struct rc_reader * new = add_reader(pool, data, inst, mask);
+	new->U.P.Src = src;
+	new->U.P.Arg = arg;
+}
+
+static unsigned int get_readers_read_callback(
+	struct get_readers_callback_data * cb_data,
+	unsigned int has_rel_addr,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int swizzle)
+{
+	unsigned int shared_mask, read_mask;
+
+	if (has_rel_addr) {
+		cb_data->ReaderData->Abort = 1;
+		return RC_MASK_NONE;
+	}
+
+	shared_mask = rc_src_reads_dst_mask(file, index, swizzle,
+		cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask);
+
+	if (shared_mask == RC_MASK_NONE)
+		return shared_mask;
+
+	/* If we make it this far, it means that this source reads from the
+	 * same register written to by d->ReaderData->Writer. */
+
+	read_mask = rc_swizzle_to_writemask(swizzle);
+	if (cb_data->ReaderData->AbortOnRead & read_mask) {
+		cb_data->ReaderData->Abort = 1;
+		return shared_mask;
+	}
+
+	if (cb_data->ReaderData->LoopDepth > 0) {
+		cb_data->ReaderData->AbortOnWrite |=
+				(read_mask & cb_data->AliveWriteMask);
+	}
+
+	/* XXX The behavior in this case should be configurable. */
+	if ((read_mask & cb_data->AliveWriteMask) != read_mask) {
+		cb_data->ReaderData->Abort = 1;
+		return shared_mask;
+	}
+
+	return shared_mask;
+}
+
+static void get_readers_pair_read_callback(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_pair_instruction_arg * arg,
+	struct rc_pair_instruction_source * src)
+{
+	unsigned int shared_mask;
+	struct get_readers_callback_data * d = userdata;
+
+	shared_mask = get_readers_read_callback(d,
+				0 /*Pair Instructions don't use RelAddr*/,
+				src->File, src->Index, arg->Swizzle);
+
+	if (shared_mask == RC_MASK_NONE)
+		return;
+
+	if (d->ReadPairCB)
+		d->ReadPairCB(d->ReaderData, inst, arg, src);
+
+	if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
+		return;
+
+	add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src);
+}
+
+/**
+ * This function is used by rc_get_readers_normal() to determine whether inst
+ * is a reader of userdata->ReaderData->Writer
+ */
+static void get_readers_normal_read_callback(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	struct get_readers_callback_data * d = userdata;
+	unsigned int shared_mask;
+
+	shared_mask = get_readers_read_callback(d,
+			src->RelAddr, src->File, src->Index, src->Swizzle);
+
+	if (shared_mask == RC_MASK_NONE)
+		return;
+	/* The callback function could potentially clear d->ReaderData->Abort,
+	 * so we need to call it before we return. */
+	if (d->ReadNormalCB)
+		d->ReadNormalCB(d->ReaderData, inst, src);
+
+	if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
+		return;
+
+	add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src);
+}
+
+/**
+ * This function is used by rc_get_readers_normal() to determine when
+ * userdata->ReaderData->Writer is dead (i. e. All compontents of its
+ * destination register have been overwritten by other instructions).
+ */
+static void get_readers_write_callback(
+	void *userdata,
+	struct rc_instruction * inst,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int mask)
+{
+	struct get_readers_callback_data * d = userdata;
+
+	if (index == d->DstIndex && file == d->DstFile) {
+		unsigned int shared_mask = mask & d->DstMask;
+		d->ReaderData->AbortOnRead &= ~shared_mask;
+		d->AliveWriteMask &= ~shared_mask;
+		if (d->ReaderData->AbortOnWrite & shared_mask) {
+			d->ReaderData->Abort = 1;
+		}
+	}
+
+	if(d->WriteCB)
+		d->WriteCB(d->ReaderData, inst, file, index, mask);
+}
+
+static void push_branch_mask(
+	struct get_readers_callback_data * d,
+	unsigned int * branch_depth)
+{
+	(*branch_depth)++;
+	if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) {
+		d->ReaderData->Abort = 1;
+		return;
+	}
+	d->BranchMasks[*branch_depth].IfWriteMask =
+					d->AliveWriteMask;
+}
+
+static void pop_branch_mask(
+	struct get_readers_callback_data * d,
+	unsigned int * branch_depth)
+{
+	struct branch_write_mask * masks = &d->BranchMasks[*branch_depth];
+
+	if (masks->HasElse) {
+		/* Abort on read for components that were written in the IF
+		 * block. */
+		d->ReaderData->AbortOnRead |=
+				masks->IfWriteMask & ~masks->ElseWriteMask;
+		/* Abort on read for components that were written in the ELSE
+		 * block. */
+		d->ReaderData->AbortOnRead |=
+				masks->ElseWriteMask & ~d->AliveWriteMask;
+
+		d->AliveWriteMask = masks->IfWriteMask
+			^ ((masks->IfWriteMask ^ masks->ElseWriteMask)
+			& (masks->IfWriteMask ^ d->AliveWriteMask));
+	} else {
+		d->ReaderData->AbortOnRead |=
+				masks->IfWriteMask & ~d->AliveWriteMask;
+		d->AliveWriteMask = masks->IfWriteMask;
+
+	}
+	memset(masks, 0, sizeof(struct branch_write_mask));
+	(*branch_depth)--;
+}
+
+static void get_readers_for_single_write(
+	void * userdata,
+	struct rc_instruction * writer,
+	rc_register_file dst_file,
+	unsigned int dst_index,
+	unsigned int dst_mask)
+{
+	struct rc_instruction * tmp;
+	unsigned int branch_depth = 0;
+	struct rc_instruction * endloop = NULL;
+	unsigned int abort_on_read_at_endloop = 0;
+	struct get_readers_callback_data * d = userdata;
+
+	d->ReaderData->Writer = writer;
+	d->ReaderData->AbortOnRead = 0;
+	d->ReaderData->AbortOnWrite = 0;
+	d->ReaderData->LoopDepth = 0;
+	d->ReaderData->InElse = 0;
+	d->DstFile = dst_file;
+	d->DstIndex = dst_index;
+	d->DstMask = dst_mask;
+	d->AliveWriteMask = dst_mask;
+	memset(d->BranchMasks, 0, sizeof(d->BranchMasks));
+
+	if (!dst_mask)
+		return;
+
+	for(tmp = writer->Next; tmp != &d->C->Program.Instructions;
+							tmp = tmp->Next){
+		rc_opcode opcode = rc_get_flow_control_inst(tmp);
+		switch(opcode) {
+		case RC_OPCODE_BGNLOOP:
+			d->ReaderData->LoopDepth++;
+			push_branch_mask(d, &branch_depth);
+			break;
+		case RC_OPCODE_ENDLOOP:
+			if (d->ReaderData->LoopDepth > 0) {
+				d->ReaderData->LoopDepth--;
+				if (d->ReaderData->LoopDepth == 0) {
+					d->ReaderData->AbortOnWrite = 0;
+				}
+				pop_branch_mask(d, &branch_depth);
+			} else {
+				/* Here we have reached an ENDLOOP without
+				 * seeing its BGNLOOP.  These means that
+				 * the writer was written inside of a loop,
+				 * so it could have readers that are above it
+				 * (i.e. they have a lower IP).  To find these
+				 * readers we jump to the BGNLOOP instruction
+				 * and check each instruction until we get
+				 * back to the writer.
+				 */
+				endloop = tmp;
+				tmp = rc_match_endloop(tmp);
+				if (!tmp) {
+					rc_error(d->C, "Failed to match endloop.\n");
+					d->ReaderData->Abort = 1;
+					return;
+				}
+				abort_on_read_at_endloop = d->ReaderData->AbortOnRead;
+				d->ReaderData->AbortOnRead |= d->AliveWriteMask;
+				continue;
+			}
+			break;
+		case RC_OPCODE_IF:
+			push_branch_mask(d, &branch_depth);
+			break;
+		case RC_OPCODE_ELSE:
+			if (branch_depth == 0) {
+				d->ReaderData->InElse = 1;
+			} else {
+				unsigned int temp_mask = d->AliveWriteMask;
+				d->AliveWriteMask =
+					d->BranchMasks[branch_depth].IfWriteMask;
+				d->BranchMasks[branch_depth].ElseWriteMask =
+								temp_mask;
+				d->BranchMasks[branch_depth].HasElse = 1;
+			}
+			break;
+		case RC_OPCODE_ENDIF:
+			if (branch_depth == 0) {
+				d->ReaderData->AbortOnRead = d->AliveWriteMask;
+				d->ReaderData->InElse = 0;
+			}
+			else {
+				pop_branch_mask(d, &branch_depth);
+			}
+			break;
+		default:
+			break;
+		}
+
+		if (d->ReaderData->InElse)
+			continue;
+
+		if (tmp->Type == RC_INSTRUCTION_NORMAL) {
+			rc_for_all_reads_src(tmp,
+				get_readers_normal_read_callback, d);
+		} else {
+			rc_pair_for_all_reads_arg(tmp,
+				get_readers_pair_read_callback, d);
+		}
+
+		/* This can happen when we jump from an ENDLOOP to BGNLOOP */
+		if (tmp == writer) {
+			tmp = endloop;
+			endloop = NULL;
+			d->ReaderData->AbortOnRead = abort_on_read_at_endloop;
+			continue;
+		}
+		rc_for_all_writes_mask(tmp, get_readers_write_callback, d);
+
+		if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
+			return;
+
+		if (branch_depth == 0 && !d->AliveWriteMask)
+			return;
+	}
+}
+
+static void init_get_readers_callback_data(
+	struct get_readers_callback_data * d,
+	struct rc_reader_data * reader_data,
+	struct radeon_compiler * c,
+	rc_read_src_fn read_normal_cb,
+	rc_pair_read_arg_fn read_pair_cb,
+	rc_read_write_mask_fn write_cb)
+{
+	reader_data->Abort = 0;
+	reader_data->ReaderCount = 0;
+	reader_data->ReadersReserved = 0;
+	reader_data->Readers = NULL;
+
+	d->C = c;
+	d->ReaderData = reader_data;
+	d->ReadNormalCB = read_normal_cb;
+	d->ReadPairCB = read_pair_cb;
+	d->WriteCB = write_cb;
+}
+
+/**
+ * This function will create a list of readers via the rc_reader_data struct.
+ * This function will abort (set the flag data->Abort) and return if it
+ * encounters an instruction that reads from @param writer and also a different
+ * instruction.  Here are some examples:
+ *
+ * writer = instruction 0;
+ * 0 MOV TEMP[0].xy, TEMP[1].xy
+ * 1 MOV TEMP[0].zw, TEMP[2].xy
+ * 2 MOV TEMP[3], TEMP[0]
+ * The Abort flag will be set on instruction 2, because it reads values written
+ * by instructions 0 and 1.
+ *
+ * writer = instruction 1;
+ * 0 IF TEMP[0].x
+ * 1 MOV TEMP[1], TEMP[2]
+ * 2 ELSE
+ * 3 MOV TEMP[1], TEMP[2]
+ * 4 ENDIF
+ * 5 MOV TEMP[3], TEMP[1]
+ * The Abort flag will be set on instruction 5, because it could read from the
+ * value written by either instruction 1 or 3, depending on the jump decision
+ * made at instruction 0.
+ *
+ * writer = instruction 0;
+ * 0 MOV TEMP[0], TEMP[1]
+ * 2 BGNLOOP
+ * 3 ADD TEMP[0], TEMP[0], none.1
+ * 4 ENDLOOP
+ * The Abort flag will be set on instruction 3, because in the first iteration
+ * of the loop it reads the value written by instruction 0 and in all other
+ * iterations it reads the value written by instruction 3.
+ *
+ * @param read_cb This function will be called for for every instruction that
+ * has been determined to be a reader of writer.
+ * @param write_cb This function will be called for every instruction after
+ * writer.
+ */
+void rc_get_readers(
+	struct radeon_compiler * c,
+	struct rc_instruction * writer,
+	struct rc_reader_data * data,
+	rc_read_src_fn read_normal_cb,
+	rc_pair_read_arg_fn read_pair_cb,
+	rc_read_write_mask_fn write_cb)
+{
+	struct get_readers_callback_data d;
+
+	init_get_readers_callback_data(&d, data, c, read_normal_cb,
+						read_pair_cb, write_cb);
+
+	rc_for_all_writes_mask(writer, get_readers_for_single_write, &d);
+}
+
+void rc_get_readers_sub(
+	struct radeon_compiler * c,
+	struct rc_instruction * writer,
+	struct rc_pair_sub_instruction * sub_writer,
+	struct rc_reader_data * data,
+	rc_read_src_fn read_normal_cb,
+	rc_pair_read_arg_fn read_pair_cb,
+	rc_read_write_mask_fn write_cb)
+{
+	struct get_readers_callback_data d;
+
+	init_get_readers_callback_data(&d, data, c, read_normal_cb,
+						read_pair_cb, write_cb);
+
+	if (sub_writer->WriteMask) {
+		get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY,
+			sub_writer->DestIndex, sub_writer->WriteMask);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow.h b/src/gallium/drivers/r300/compiler/radeon_dataflow.h
new file mode 100644
index 00000000000..d8a627258ea
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_dataflow.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_DATAFLOW_H
+#define RADEON_DATAFLOW_H
+
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_swizzle_caps;
+struct rc_src_register;
+struct rc_pair_instruction_arg;
+struct rc_pair_instruction_source;
+struct rc_pair_sub_instruction;
+struct rc_compiler;
+
+
+/**
+ * Help analyze and modify the register accesses of instructions.
+ */
+/*@{*/
+typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
+			rc_register_file file, unsigned int index, unsigned int chan);
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+
+typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
+			rc_register_file file, unsigned int index, unsigned int mask);
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+
+typedef void (*rc_read_src_fn)(void * userdata, struct rc_instruction * inst,
+			struct rc_src_register * src);
+void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb,
+			void * userdata);
+
+typedef void (*rc_pair_read_arg_fn)(void * userdata,
+	struct rc_instruction * inst, struct rc_pair_instruction_arg * arg,
+	struct rc_pair_instruction_source * src);
+void rc_pair_for_all_reads_arg(struct rc_instruction * inst,
+					rc_pair_read_arg_fn cb, void * userdata);
+
+typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
+			rc_register_file * pfile, unsigned int * pindex);
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
+/*@}*/
+
+struct rc_reader {
+	struct rc_instruction * Inst;
+	unsigned int WriteMask;
+	union {
+		struct {
+			struct rc_src_register * Src;
+		} I;
+		struct {
+			struct rc_pair_instruction_arg * Arg;
+			struct rc_pair_instruction_source * Src;
+		} P;
+	} U;
+};
+
+struct rc_reader_data {
+	unsigned int Abort;
+	unsigned int AbortOnRead;
+	unsigned int AbortOnWrite;
+	unsigned int LoopDepth;
+	unsigned int InElse;
+	struct rc_instruction * Writer;
+
+	unsigned int ReaderCount;
+	unsigned int ReadersReserved;
+	struct rc_reader * Readers;
+
+	/* If this flag is enabled, rc_get_readers will exit as soon possbile
+	 * after the Abort flag is set.*/
+	unsigned int ExitOnAbort;
+	void * CbData;
+};
+
+void rc_get_readers(
+	struct radeon_compiler * c,
+	struct rc_instruction * writer,
+	struct rc_reader_data * data,
+	rc_read_src_fn read_normal_cb,
+	rc_pair_read_arg_fn read_pair_cb,
+	rc_read_write_mask_fn write_cb);
+
+void rc_get_readers_sub(
+	struct radeon_compiler * c,
+	struct rc_instruction * writer,
+	struct rc_pair_sub_instruction * sub_writer,
+	struct rc_reader_data * data,
+	rc_read_src_fn read_normal_cb,
+	rc_pair_read_arg_fn read_pair_cb,
+	rc_read_write_mask_fn write_cb);
+/**
+ * Compiler passes based on dataflow analysis.
+ */
+/*@{*/
+typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
+			void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
+void rc_dataflow_deadcode(struct radeon_compiler * c, void *user);
+void rc_dataflow_swizzles(struct radeon_compiler * c, void *user);
+/*@}*/
+
+void rc_optimize(struct radeon_compiler * c, void *user);
+
+#endif /* RADEON_DATAFLOW_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c
new file mode 100644
index 00000000000..678e1475883
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_deadcode.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+struct updatemask_state {
+	unsigned char Output[RC_REGISTER_MAX_INDEX];
+	unsigned char Temporary[RC_REGISTER_MAX_INDEX];
+	unsigned char Address;
+	unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
+};
+
+struct instruction_state {
+	unsigned char WriteMask:4;
+	unsigned char WriteALUResult:1;
+	unsigned char SrcReg[3];
+};
+
+struct loopinfo {
+	struct updatemask_state * Breaks;
+	unsigned int BreakCount;
+	unsigned int BreaksReserved;
+};
+
+struct branchinfo {
+	unsigned int HaveElse:1;
+
+	struct updatemask_state StoreEndif;
+	struct updatemask_state StoreElse;
+};
+
+struct deadcode_state {
+	struct radeon_compiler * C;
+	struct instruction_state * Instructions;
+
+	struct updatemask_state R;
+
+	struct branchinfo * BranchStack;
+	unsigned int BranchStackSize;
+	unsigned int BranchStackReserved;
+
+	struct loopinfo * LoopStack;
+	unsigned int LoopStackSize;
+	unsigned int LoopStackReserved;
+};
+
+
+static void or_updatemasks(
+	struct updatemask_state * dst,
+	struct updatemask_state * a,
+	struct updatemask_state * b)
+{
+	for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
+		dst->Output[i] = a->Output[i] | b->Output[i];
+		dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
+	}
+
+	for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
+		dst->Special[i] = a->Special[i] | b->Special[i];
+
+	dst->Address = a->Address | b->Address;
+}
+
+static void push_break(struct deadcode_state *s)
+{
+	struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
+	memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
+		loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
+
+	memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
+}
+
+static void push_loop(struct deadcode_state * s)
+{
+	memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
+			s->LoopStackSize, s->LoopStackReserved, 1);
+	memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
+}
+
+static void push_branch(struct deadcode_state * s)
+{
+	struct branchinfo * branch;
+
+	memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
+			s->BranchStackSize, s->BranchStackReserved, 1);
+
+	branch = &s->BranchStack[s->BranchStackSize++];
+	branch->HaveElse = 0;
+	memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
+}
+
+static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
+{
+	if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
+		if (index >= RC_REGISTER_MAX_INDEX) {
+			rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
+			return 0;
+		}
+
+		if (file == RC_FILE_OUTPUT)
+			return &s->R.Output[index];
+		else
+			return &s->R.Temporary[index];
+	} else if (file == RC_FILE_ADDRESS) {
+		return &s->R.Address;
+	} else if (file == RC_FILE_SPECIAL) {
+		if (index >= RC_NUM_SPECIAL_REGISTERS) {
+			rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
+			return 0;
+		}
+
+		return &s->R.Special[index];
+	}
+
+	return 0;
+}
+
+static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
+{
+	unsigned char * pused = get_used_ptr(s, file, index);
+	if (pused)
+		*pused |= mask;
+}
+
+static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	struct instruction_state * insts = &s->Instructions[inst->IP];
+	unsigned int usedmask = 0;
+	unsigned int srcmasks[3];
+
+	if (opcode->HasDstReg) {
+		unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
+		if (pused) {
+			usedmask = *pused & inst->U.I.DstReg.WriteMask;
+			*pused &= ~usedmask;
+		}
+	}
+
+	insts->WriteMask |= usedmask;
+
+	if (inst->U.I.WriteALUResult) {
+		unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
+		if (pused && *pused) {
+			if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+				usedmask |= RC_MASK_X;
+			else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+				usedmask |= RC_MASK_W;
+
+			*pused = 0;
+			insts->WriteALUResult = 1;
+		}
+	}
+
+	rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
+
+	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+		unsigned int refmask = 0;
+		unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
+		insts->SrcReg[src] |= newsrcmask;
+
+		for(unsigned int chan = 0; chan < 4; ++chan) {
+			if (GET_BIT(newsrcmask, chan))
+				refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+		}
+
+		/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
+		refmask &= RC_MASK_XYZW;
+
+		if (!refmask)
+			continue;
+
+		mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
+
+		if (inst->U.I.SrcReg[src].RelAddr)
+			mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
+	}
+}
+
+static void mark_output_use(void * data, unsigned int index, unsigned int mask)
+{
+	struct deadcode_state * s = data;
+
+	mark_used(s, RC_FILE_OUTPUT, index, mask);
+}
+
+void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
+{
+	struct deadcode_state s;
+	unsigned int nr_instructions;
+	rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
+	unsigned int ip;
+
+	memset(&s, 0, sizeof(s));
+	s.C = c;
+
+	nr_instructions = rc_recompute_ips(c);
+	s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
+	memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
+
+	dce(c, &s, &mark_output_use);
+
+	for(struct rc_instruction * inst = c->Program.Instructions.Prev;
+	    inst != &c->Program.Instructions;
+	    inst = inst->Prev) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		switch(opcode->Opcode){
+		/* Mark all sources in the loop body as used before doing
+		 * normal deadcode analysis.  This is probably not optimal.
+		 */
+		case RC_OPCODE_ENDLOOP:
+		{
+			int endloops = 1;
+			struct rc_instruction *ptr;
+			for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
+				opcode = rc_get_opcode_info(ptr->U.I.Opcode);
+				if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+					endloops--;
+					continue;
+				}
+				if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
+					endloops++;
+					continue;
+				}
+				if(opcode->HasDstReg){
+					int src = 0;
+					unsigned int srcmasks[3];
+					rc_compute_sources_for_writemask(ptr,
+						ptr->U.I.DstReg.WriteMask, srcmasks);
+					for(src=0; src < opcode->NumSrcRegs; src++){
+						mark_used(&s,
+							ptr->U.I.SrcReg[src].File,
+							ptr->U.I.SrcReg[src].Index,
+							srcmasks[src]);
+					}
+				}
+			}
+			push_loop(&s);
+			break;
+		}
+		case RC_OPCODE_BRK:
+			push_break(&s);
+			break;
+		case RC_OPCODE_BGNLOOP:
+		{
+			unsigned int i;
+			struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
+			for(i = 0; i < loop->BreakCount; i++) {
+				or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
+			}
+			break;
+		}
+		case RC_OPCODE_CONT:
+			break;
+		case RC_OPCODE_ENDIF:
+			push_branch(&s);
+			break;
+		default:
+			if (opcode->IsFlowControl && s.BranchStackSize) {
+				struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
+				if (opcode->Opcode == RC_OPCODE_IF) {
+					or_updatemasks(&s.R,
+							&s.R,
+							branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
+
+					s.BranchStackSize--;
+				} else if (opcode->Opcode == RC_OPCODE_ELSE) {
+					if (branch->HaveElse) {
+						rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
+					} else {
+						memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
+						memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
+						branch->HaveElse = 1;
+					}
+				} else {
+					rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
+				}
+			}
+		}
+
+		update_instruction(&s, inst);
+	}
+
+	ip = 0;
+	for(struct rc_instruction * inst = c->Program.Instructions.Next;
+	    inst != &c->Program.Instructions;
+	    inst = inst->Next, ++ip) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		int dead = 1;
+		unsigned int srcmasks[3];
+		unsigned int usemask;
+
+		if (!opcode->HasDstReg) {
+			dead = 0;
+		} else {
+			inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
+			if (s.Instructions[ip].WriteMask)
+				dead = 0;
+
+			if (s.Instructions[ip].WriteALUResult)
+				dead = 0;
+			else
+				inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
+		}
+
+		if (dead) {
+			struct rc_instruction * todelete = inst;
+			inst = inst->Prev;
+			rc_remove_instruction(todelete);
+			continue;
+		}
+
+		usemask = s.Instructions[ip].WriteMask;
+
+		if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+			usemask |= RC_MASK_X;
+		else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+			usemask |= RC_MASK_W;
+
+		rc_compute_sources_for_writemask(inst, usemask, srcmasks);
+
+		for(unsigned int src = 0; src < 3; ++src) {
+			for(unsigned int chan = 0; chan < 4; ++chan) {
+				if (!GET_BIT(srcmasks[src], chan))
+					SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
+			}
+		}
+	}
+
+	rc_calculate_inputs_outputs(c);
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c
new file mode 100644
index 00000000000..133a9f72ec7
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static void rewrite_source(struct radeon_compiler * c,
+		struct rc_instruction * inst, unsigned src)
+{
+	struct rc_swizzle_split split;
+	unsigned int tempreg = rc_find_free_temporary(c);
+	unsigned int usemask;
+
+	usemask = 0;
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
+			usemask |= 1 << chan;
+	}
+
+	c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);
+
+	for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
+		struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
+		unsigned int phase_refmask;
+		unsigned int masked_negate;
+
+		mov->U.I.Opcode = RC_OPCODE_MOV;
+		mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		mov->U.I.DstReg.Index = tempreg;
+		mov->U.I.DstReg.WriteMask = split.Phase[phase];
+		mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
+		mov->U.I.PreSub = inst->U.I.PreSub;
+
+		phase_refmask = 0;
+		for(unsigned int chan = 0; chan < 4; ++chan) {
+			if (!GET_BIT(split.Phase[phase], chan))
+				SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
+			else
+				phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
+		}
+
+		phase_refmask &= RC_MASK_XYZW;
+
+		masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
+		if (masked_negate == 0)
+			mov->U.I.SrcReg[0].Negate = 0;
+		else if (masked_negate == split.Phase[phase])
+			mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+
+	}
+
+	inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[src].Index = tempreg;
+	inst->U.I.SrcReg[src].Swizzle = 0;
+	inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
+	inst->U.I.SrcReg[src].Abs = 0;
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
+				GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
+	}
+}
+
+void rc_dataflow_swizzles(struct radeon_compiler * c, void *user)
+{
+	struct rc_instruction * inst;
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		unsigned int src;
+
+		for(src = 0; src < opcode->NumSrcRegs; ++src) {
+			if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+				rewrite_source(c, inst, src);
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c
new file mode 100644
index 00000000000..7bede344f30
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_emulate_branches.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+struct proxy_info {
+	unsigned int Proxied:1;
+	unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct register_proxies {
+	struct proxy_info Temporary[RC_REGISTER_MAX_INDEX];
+};
+
+struct branch_info {
+	struct rc_instruction * If;
+	struct rc_instruction * Else;
+};
+
+struct emulate_branch_state {
+	struct radeon_compiler * C;
+
+	struct branch_info * Branches;
+	unsigned int BranchCount;
+	unsigned int BranchReserved;
+};
+
+
+static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+	struct branch_info * branch;
+	struct rc_instruction * inst_mov;
+
+	memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+			s->Branches, s->BranchCount, s->BranchReserved, 1);
+
+	DBG("%s\n", __FUNCTION__);
+
+	branch = &s->Branches[s->BranchCount++];
+	memset(branch, 0, sizeof(struct branch_info));
+	branch->If = inst;
+
+	/* Make a safety copy of the decision register, because we will need
+	 * it at ENDIF time and it might be overwritten in both branches. */
+	inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
+	inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+	inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
+	inst_mov->U.I.DstReg.WriteMask = RC_MASK_X;
+	inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+	inst->U.I.SrcReg[0].Swizzle = 0;
+	inst->U.I.SrcReg[0].Abs = 0;
+	inst->U.I.SrcReg[0].Negate = 0;
+}
+
+static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+	struct branch_info * branch;
+
+	if (!s->BranchCount) {
+		rc_error(s->C, "Encountered ELSE outside of branches");
+		return;
+	}
+
+	DBG("%s\n", __FUNCTION__);
+
+	branch = &s->Branches[s->BranchCount - 1];
+	branch->Else = inst;
+}
+
+
+struct state_and_proxies {
+	struct emulate_branch_state * S;
+	struct register_proxies * Proxies;
+};
+
+static struct proxy_info * get_proxy_info(struct state_and_proxies * sap,
+			rc_register_file file, unsigned int index)
+{
+	if (file == RC_FILE_TEMPORARY) {
+		return &sap->Proxies->Temporary[index];
+	} else {
+		return 0;
+	}
+}
+
+static void scan_write(void * userdata, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int comp)
+{
+	struct state_and_proxies * sap = userdata;
+	struct proxy_info * proxy = get_proxy_info(sap, file, index);
+
+	if (proxy && !proxy->Proxied) {
+		proxy->Proxied = 1;
+		proxy->Index = rc_find_free_temporary(sap->S->C);
+	}
+}
+
+static void remap_proxy_function(void * userdata, struct rc_instruction * inst,
+		rc_register_file * pfile, unsigned int * pindex)
+{
+	struct state_and_proxies * sap = userdata;
+	struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex);
+
+	if (proxy && proxy->Proxied) {
+		*pfile = RC_FILE_TEMPORARY;
+		*pindex = proxy->Index;
+	}
+}
+
+/**
+ * Redirect all writes in the instruction range [begin, end) to proxy
+ * temporary registers.
+ */
+static void allocate_and_insert_proxies(struct emulate_branch_state * s,
+		struct register_proxies * proxies,
+		struct rc_instruction * begin,
+		struct rc_instruction * end)
+{
+	struct state_and_proxies sap;
+
+	sap.S = s;
+	sap.Proxies = proxies;
+
+	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+		rc_for_all_writes_mask(inst, scan_write, &sap);
+		rc_remap_registers(inst, remap_proxy_function, &sap);
+	}
+
+	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+		if (proxies->Temporary[index].Proxied) {
+			struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev);
+			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index;
+			inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+			inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.SrcReg[0].Index = index;
+		}
+	}
+}
+
+
+static void inject_cmp(struct emulate_branch_state * s,
+		struct rc_instruction * inst_if,
+		struct rc_instruction * inst_endif,
+		rc_register_file file, unsigned int index,
+		struct proxy_info ifproxy,
+		struct proxy_info elseproxy)
+{
+	struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif);
+	inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+	inst_cmp->U.I.DstReg.File = file;
+	inst_cmp->U.I.DstReg.Index = index;
+	inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+	inst_cmp->U.I.SrcReg[0].Abs = 1;
+	inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+	inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+	inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index;
+	inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+	inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index;
+}
+
+static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+	struct branch_info * branch;
+	struct register_proxies IfProxies;
+	struct register_proxies ElseProxies;
+
+	if (!s->BranchCount) {
+		rc_error(s->C, "Encountered ENDIF outside of branches");
+		return;
+	}
+
+	DBG("%s\n", __FUNCTION__);
+
+	branch = &s->Branches[s->BranchCount - 1];
+
+	memset(&IfProxies, 0, sizeof(IfProxies));
+	memset(&ElseProxies, 0, sizeof(ElseProxies));
+
+	allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst);
+
+	if (branch->Else)
+		allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst);
+
+	/* Insert the CMP instructions at the end. */
+	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+		if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) {
+			inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index,
+					IfProxies.Temporary[index], ElseProxies.Temporary[index]);
+		}
+	}
+
+	/* Remove all traces of the branch instructions */
+	rc_remove_instruction(branch->If);
+	if (branch->Else)
+		rc_remove_instruction(branch->Else);
+	rc_remove_instruction(inst);
+
+	s->BranchCount--;
+
+	if (VERBOSE) {
+		DBG("Program after ENDIF handling:\n");
+		rc_print_program(&s->C->Program);
+	}
+}
+
+
+struct remap_output_data {
+	unsigned int Output:RC_REGISTER_INDEX_BITS;
+	unsigned int Temporary:RC_REGISTER_INDEX_BITS;
+};
+
+static void remap_output_function(void * userdata, struct rc_instruction * inst,
+		rc_register_file * pfile, unsigned int * pindex)
+{
+	struct remap_output_data * data = userdata;
+
+	if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) {
+		*pfile = RC_FILE_TEMPORARY;
+		*pindex = data->Temporary;
+	}
+}
+
+
+/**
+ * Output registers cannot be read from and so cannot be dealt with like
+ * temporary registers.
+ *
+ * We do the simplest thing: If an output registers is written within
+ * a branch, then *all* writes to this register are proxied to a
+ * temporary register, and a final MOV is appended to the end of
+ * the program.
+ */
+static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * opcode;
+
+	if (!s->BranchCount)
+		return;
+
+	opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+	if (!opcode->HasDstReg)
+		return;
+
+	if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
+		struct remap_output_data remap;
+		struct rc_instruction * inst_mov;
+
+		remap.Output = inst->U.I.DstReg.Index;
+		remap.Temporary = rc_find_free_temporary(s->C);
+
+		for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+		    inst != &s->C->Program.Instructions;
+		    inst = inst->Next) {
+			rc_remap_registers(inst, &remap_output_function, &remap);
+		}
+
+		inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
+		inst_mov->U.I.DstReg.Index = remap.Output;
+		inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.SrcReg[0].Index = remap.Temporary;
+	}
+}
+
+/**
+ * Remove branch instructions; instead, execute both branches
+ * on different register sets and choose between their results
+ * using CMP instructions in place of the original ENDIF.
+ */
+void rc_emulate_branches(struct radeon_compiler *c, void *user)
+{
+	struct emulate_branch_state s;
+	struct rc_instruction * ptr;
+
+	memset(&s, 0, sizeof(s));
+	s.C = c;
+
+	/* Untypical loop because we may remove the current instruction */
+	ptr = c->Program.Instructions.Next;
+	while(ptr != &c->Program.Instructions) {
+		struct rc_instruction * inst = ptr;
+		ptr = ptr->Next;
+
+		if (inst->Type == RC_INSTRUCTION_NORMAL) {
+			switch(inst->U.I.Opcode) {
+			case RC_OPCODE_IF:
+				handle_if(&s, inst);
+				break;
+			case RC_OPCODE_ELSE:
+				handle_else(&s, inst);
+				break;
+			case RC_OPCODE_ENDIF:
+				handle_endif(&s, inst);
+				break;
+			default:
+				fix_output_writes(&s, inst);
+				break;
+			}
+		} else {
+			rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__);
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h
new file mode 100644
index 00000000000..818ab84d0cd
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_emulate_branches.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_EMULATE_BRANCHES_H
+#define RADEON_EMULATE_BRANCHES_H
+
+struct radeon_compiler;
+
+void rc_emulate_branches(struct radeon_compiler *c, void *user);
+
+#endif /* RADEON_EMULATE_BRANCHES_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c
new file mode 100644
index 00000000000..205eecd1129
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.c
@@ -0,0 +1,522 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_emulate_loops.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct const_value {
+	struct radeon_compiler * C;
+	struct rc_src_register * Src;
+	float Value;
+	int HasValue;
+};
+
+struct count_inst {
+	struct radeon_compiler * C;
+	int Index;
+	rc_swizzle Swz;
+	float Amount;
+	int Unknown;
+};
+
+static float get_constant_value(struct radeon_compiler * c,
+						struct rc_src_register * src,
+						int chan)
+{
+	float base = 1.0f;
+	int swz = GET_SWZ(src->Swizzle, chan);
+	if(swz >= 4 || src->Index >= c->Program.Constants.Count ){
+		rc_error(c, "get_constant_value: Can't find a value.\n");
+		return 0.0f;
+	}
+	if(GET_BIT(src->Negate, chan)){
+		base = -1.0f;
+	}
+	return base *
+		c->Program.Constants.Constants[src->Index].u.Immediate[swz];
+}
+
+static int src_reg_is_immediate(struct rc_src_register * src,
+						struct radeon_compiler * c)
+{
+	return src->File == RC_FILE_CONSTANT &&
+	c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
+}
+
+static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
+			struct loop_info * loop)
+{
+	unsigned int total_i = rc_recompute_ips(c);
+	unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
+	/* +1 because the program already has one iteration of the loop. */
+	return 1 + ((c->max_alu_insts - total_i) / loop_i);
+}
+
+static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+						unsigned int iterations)
+{
+	unsigned int i;
+	struct rc_instruction * ptr;
+	struct rc_instruction * first = loop->BeginLoop->Next;
+	struct rc_instruction * last = loop->EndLoop->Prev;
+	struct rc_instruction * append_to = last;
+	rc_remove_instruction(loop->BeginLoop);
+	rc_remove_instruction(loop->EndLoop);
+	for( i = 1; i < iterations; i++){
+		for(ptr = first; ptr != last->Next; ptr = ptr->Next){
+			struct rc_instruction *new = rc_alloc_instruction(c);
+			memcpy(new, ptr, sizeof(struct rc_instruction));
+			rc_insert_instruction(append_to, new);
+			append_to = new;
+		}
+	}
+}
+
+
+static void update_const_value(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct const_value * value = data;
+	if(value->Src->File != file ||
+	   value->Src->Index != index ||
+	   !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
+		return;
+	}
+	switch(inst->U.I.Opcode){
+	case RC_OPCODE_MOV:
+		if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
+			return;
+		}
+		value->HasValue = 1;
+		value->Value =
+			get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
+		break;
+	}
+}
+
+static void get_incr_amount(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct count_inst * count_inst = data;
+	int amnt_src_index;
+	const struct rc_opcode_info * opcode;
+	float amount;
+
+	if(file != RC_FILE_TEMPORARY ||
+	   count_inst->Index != index ||
+	   (1 << GET_SWZ(count_inst->Swz,0) != mask)){
+		return;
+	}
+	/* Find the index of the counter register. */
+	opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	if(opcode->NumSrcRegs != 2){
+		count_inst->Unknown = 1;
+		return;
+	}
+	if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
+	   inst->U.I.SrcReg[0].Index == count_inst->Index &&
+	   inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
+		amnt_src_index = 1;
+	} else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
+		   inst->U.I.SrcReg[1].Index == count_inst->Index &&
+		   inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
+		amnt_src_index = 0;
+	}
+	else{
+		count_inst->Unknown = 1;
+		return;
+	}
+	if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index],
+							count_inst->C)){
+		amount = get_constant_value(count_inst->C,
+				&inst->U.I.SrcReg[amnt_src_index], 0);
+	}
+	else{
+		count_inst->Unknown = 1 ;
+		return;
+	}
+	switch(inst->U.I.Opcode){
+	case RC_OPCODE_ADD:
+		count_inst->Amount += amount;
+		break;
+	case RC_OPCODE_SUB:
+		if(amnt_src_index == 0){
+			count_inst->Unknown = 0;
+			return;
+		}
+		count_inst->Amount -= amount;
+		break;
+	default:
+		count_inst->Unknown = 1;
+		return;
+	}
+}
+
+/**
+ * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless
+ * of how many iterations they have.
+ */
+static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop)
+{
+	int end_loops;
+	int iterations;
+	struct count_inst count_inst;
+	float limit_value;
+	struct rc_src_register * counter;
+	struct rc_src_register * limit;
+	struct const_value counter_value;
+	struct rc_instruction * inst;
+
+	/* Find the counter and the upper limit */
+
+	if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){
+		limit = &loop->Cond->U.I.SrcReg[0];
+		counter = &loop->Cond->U.I.SrcReg[1];
+	}
+	else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){
+		limit = &loop->Cond->U.I.SrcReg[1];
+		counter = &loop->Cond->U.I.SrcReg[0];
+	}
+	else{
+		DBG("No constant limit.\n");
+		return 0;
+	}
+
+	/* Find the initial value of the counter */
+	counter_value.Src = counter;
+	counter_value.Value = 0.0f;
+	counter_value.HasValue = 0;
+	counter_value.C = c;
+	for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
+							inst = inst->Next){
+		rc_for_all_writes_mask(inst, update_const_value, &counter_value);
+	}
+	if(!counter_value.HasValue){
+		DBG("Initial counter value cannot be determined.\n");
+		return 0;
+	}
+	DBG("Initial counter value is %f\n", counter_value.Value);
+	/* Determine how the counter is modified each loop */
+	count_inst.C = c;
+	count_inst.Index = counter->Index;
+	count_inst.Swz = counter->Swizzle;
+	count_inst.Amount = 0.0f;
+	count_inst.Unknown = 0;
+	end_loops = 1;
+	for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
+		switch(inst->U.I.Opcode){
+		/* XXX In the future we might want to try to unroll nested
+		 * loops here.*/
+		case RC_OPCODE_BGNLOOP:
+			end_loops++;
+			break;
+		case RC_OPCODE_ENDLOOP:
+			loop->EndLoop = inst;
+			end_loops--;
+			break;
+		case RC_OPCODE_BRK:
+			/* Don't unroll loops if it has a BRK instruction
+			 * other one used when testing the main conditional
+			 * of the loop. */
+
+			/* Make sure we haven't entered a nested loops. */
+			if(inst != loop->Brk && end_loops == 1) {
+				return 0;
+			}
+			break;
+		/* XXX Check if the counter is modified within an if statement.
+		 */
+		case RC_OPCODE_IF:
+			break;
+		default:
+			rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
+			if(count_inst.Unknown){
+				return 0;
+			}
+			break;
+		}
+	}
+	/* Infinite loop */
+	if(count_inst.Amount == 0.0f){
+		return 0;
+	}
+	DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
+	/* Calculate the number of iterations of this loop.  Keeping this
+	 * simple, since we only support increment and decrement loops.
+	 */
+	limit_value = get_constant_value(c, limit, 0);
+	DBG("Limit is %f.\n", limit_value);
+	/* The iteration calculations are opposite of what you would expect.
+	 * In a normal loop, if the condition is met, then loop continues, but
+	 * with our loops, if the condition is met, the is exited. */
+	switch(loop->Cond->U.I.Opcode){
+	case RC_OPCODE_SGE:
+	case RC_OPCODE_SLE:
+		iterations = (int) ceilf((limit_value - counter_value.Value) /
+							count_inst.Amount);
+		break;
+
+	case RC_OPCODE_SGT:
+	case RC_OPCODE_SLT:
+		iterations = (int) floorf((limit_value - counter_value.Value) /
+							count_inst.Amount) + 1;
+		break;
+	default:
+		return 0;
+	}
+
+	if (c->max_alu_insts > 0
+		&& iterations > loop_max_possible_iterations(c, loop)) {
+		return 0;
+	}
+
+	DBG("Loop will have %d iterations.\n", iterations);
+
+	/* Prepare loop for unrolling */
+	rc_remove_instruction(loop->Cond);
+	rc_remove_instruction(loop->If);
+	rc_remove_instruction(loop->Brk);
+	rc_remove_instruction(loop->EndIf);
+
+	unroll_loop(c, loop, iterations);
+	loop->EndLoop = NULL;
+	return 1;
+}
+
+/**
+ * @param c
+ * @param loop
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return 1 if all of the members of loop where set.
+ * @return 0 if there was an error and some members of loop are still NULL.
+ */
+static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
+						struct rc_instruction * inst)
+{
+	struct rc_instruction * ptr;
+
+	if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
+		rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
+		return 0;
+	}
+
+	memset(loop, 0, sizeof(struct loop_info));
+
+	loop->BeginLoop = inst;
+
+	for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
+
+		if (ptr == &c->Program.Instructions) {
+			rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
+								__FUNCTION__);
+			return 0;
+		}
+
+		switch(ptr->U.I.Opcode){
+		case RC_OPCODE_BGNLOOP:
+		{
+			/* Nested loop, skip ahead to the end. */
+			unsigned int loop_depth = 1;
+			for(ptr = ptr->Next; ptr != &c->Program.Instructions;
+							ptr = ptr->Next){
+				if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+					loop_depth++;
+				} else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+					if (!--loop_depth) {
+						break;
+					}
+				}
+			}
+			if (ptr == &c->Program.Instructions) {
+				rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
+								__FUNCTION__);
+					return 0;
+			}
+			break;
+		}
+		case RC_OPCODE_BRK:
+			if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
+					|| ptr->Prev->U.I.Opcode != RC_OPCODE_IF
+					|| loop->Brk){
+				continue;
+			}
+			loop->Brk = ptr;
+			loop->If = ptr->Prev;
+			loop->EndIf = ptr->Next;
+			switch(loop->If->Prev->U.I.Opcode){
+			case RC_OPCODE_SLT:
+			case RC_OPCODE_SGE:
+			case RC_OPCODE_SGT:
+			case RC_OPCODE_SLE:
+			case RC_OPCODE_SEQ:
+			case RC_OPCODE_SNE:
+				break;
+			default:
+				return 0;
+			}
+			loop->Cond = loop->If->Prev;
+			break;
+
+		case RC_OPCODE_ENDLOOP:
+			loop->EndLoop = ptr;
+			break;
+		}
+	}
+
+	if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
+					&& loop->Cond && loop->EndLoop) {
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement.  Here is an outline of the conversion process:
+ * BGNLOOP;                         	-> BGNLOOP;
+ * <Additional conditional code>	-> <Additional conditional code>
+ * SGE/SLT temp[0], temp[1], temp[2];	-> SLT/SGE temp[0], temp[1], temp[2];
+ * IF temp[0];                      	-> IF temp[0];
+ * BRK;                             	->
+ * ENDIF;                           	-> <Loop Body>
+ * <Loop Body>                      	-> ENDIF;
+ * ENDLOOP;                         	-> ENDLOOP
+ *
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return 1 for success, 0 for failure
+ */
+static int transform_loop(struct emulate_loop_state * s,
+						struct rc_instruction * inst)
+{
+	struct loop_info * loop;
+
+	memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+			s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+	loop = &s->Loops[s->LoopCount++];
+
+	if (!build_loop_info(s->C, loop, inst)) {
+		rc_error(s->C, "Failed to build loop info\n");
+		return 0;
+	}
+
+	if(try_unroll_loop(s->C, loop)){
+		return 1;
+	}
+
+	/* Reverse the conditional instruction */
+	switch(loop->Cond->U.I.Opcode){
+	case RC_OPCODE_SGE:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
+		break;
+	case RC_OPCODE_SLT:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
+		break;
+	case RC_OPCODE_SLE:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
+		break;
+	case RC_OPCODE_SGT:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
+		break;
+	case RC_OPCODE_SEQ:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
+		break;
+	case RC_OPCODE_SNE:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
+		break;
+	default:
+		rc_error(s->C, "loop->Cond is not a conditional.\n");
+		return 0;
+	}
+
+	/* Prepare the loop to be emulated */
+	rc_remove_instruction(loop->Brk);
+	rc_remove_instruction(loop->EndIf);
+	rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
+	return 1;
+}
+
+void rc_transform_loops(struct radeon_compiler *c, void *user)
+{
+	struct emulate_loop_state * s = &c->loop_state;
+	struct rc_instruction * ptr;
+
+	memset(s, 0, sizeof(struct emulate_loop_state));
+	s->C = c;
+	for(ptr = s->C->Program.Instructions.Next;
+			ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
+		if(ptr->Type == RC_INSTRUCTION_NORMAL &&
+					ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+			if (!transform_loop(s, ptr))
+				return;
+		}
+	}
+}
+
+void rc_unroll_loops(struct radeon_compiler *c, void *user)
+{
+	struct rc_instruction * inst;
+	struct loop_info loop;
+
+	for(inst = c->Program.Instructions.Next;
+			inst != &c->Program.Instructions; inst = inst->Next) {
+
+		if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+			if (build_loop_info(c, &loop, inst)) {
+				try_unroll_loop(c, &loop);
+			}
+		}
+	}
+}
+
+void rc_emulate_loops(struct radeon_compiler *c, void *user)
+{
+	struct emulate_loop_state * s = &c->loop_state;
+	int i;
+	/* Iterate backwards of the list of loops so that loops that nested
+	 * loops are unrolled first.
+	 */
+	for( i = s->LoopCount - 1; i >= 0; i-- ){
+		unsigned int iterations;
+
+		if(!s->Loops[i].EndLoop){
+			continue;
+		}
+		iterations = loop_max_possible_iterations(s->C, &s->Loops[i]);
+		unroll_loop(s->C, &s->Loops[i], iterations);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h
new file mode 100644
index 00000000000..cd800c059d9
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_emulate_loops.h
@@ -0,0 +1,32 @@
+
+
+#ifndef RADEON_EMULATE_LOOPS_H
+#define RADEON_EMULATE_LOOPS_H
+
+#define MAX_ITERATIONS 8
+
+struct radeon_compiler;
+
+struct loop_info {
+	struct rc_instruction * BeginLoop;
+	struct rc_instruction * Cond;
+	struct rc_instruction * If;
+	struct rc_instruction * Brk;
+	struct rc_instruction * EndIf;
+	struct rc_instruction * EndLoop;
+};
+
+struct emulate_loop_state {
+	struct radeon_compiler * C;
+	struct loop_info * Loops;
+	unsigned int LoopCount;
+	unsigned int LoopReserved;
+};
+
+void rc_transform_loops(struct radeon_compiler *c, void *user);
+
+void rc_unroll_loops(struct radeon_compiler * c, void *user);
+
+void rc_emulate_loops(struct radeon_compiler * c, void *user);
+
+#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_list.c b/src/gallium/drivers/r300/compiler/radeon_list.c
new file mode 100644
index 00000000000..811c908a81a
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_list.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_list.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "memory_pool.h"
+
+struct rc_list * rc_list(struct memory_pool * pool, void * item)
+{
+	struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list));
+	new->Item = item;
+	new->Next = NULL;
+	new->Prev = NULL;
+
+	return new;
+}
+
+void rc_list_add(struct rc_list ** list, struct rc_list * new_value)
+{
+	struct rc_list * temp;
+
+	if (*list == NULL) {
+		*list = new_value;
+		return;
+	}
+
+	for (temp = *list; temp->Next; temp = temp->Next);
+
+	temp->Next = new_value;
+	new_value->Prev = temp;
+}
+
+void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value)
+{
+	if (*list == rm_value) {
+		*list = rm_value->Next;
+		return;
+	}
+
+	rm_value->Prev->Next = rm_value->Next;
+	if (rm_value->Next) {
+		rm_value->Next->Prev = rm_value->Prev;
+	}
+}
+
+unsigned int rc_list_count(struct rc_list * list)
+{
+	unsigned int count = 0;
+	while (list) {
+		count++;
+		list = list->Next;
+	}
+	return count;
+}
+
+void rc_list_print(struct rc_list * list)
+{
+	while(list) {
+		fprintf(stderr, "%p->", list->Item);
+		list = list->Next;
+	}
+	fprintf(stderr, "\n");
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_list.h b/src/gallium/drivers/r300/compiler/radeon_list.h
new file mode 100644
index 00000000000..b3c8f89cc68
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_list.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_LIST_H
+#define RADEON_LIST_H
+
+struct memory_pool;
+
+struct rc_list {
+	void * Item;
+	struct rc_list * Prev;
+	struct rc_list * Next;
+};
+
+struct rc_list * rc_list(struct memory_pool * pool, void * item);
+void rc_list_add(struct rc_list ** list, struct rc_list * new_value);
+void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value);
+unsigned int rc_list_count(struct rc_list * list);
+void rc_list_print(struct rc_list * list);
+
+#endif /* RADEON_LIST_H */
+
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
new file mode 100644
index 00000000000..afd78ad79dd
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
@@ -0,0 +1,546 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_opcodes.h"
+#include "radeon_program.h"
+
+#include "radeon_program_constants.h"
+
+struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
+	{
+		.Opcode = RC_OPCODE_NOP,
+		.Name = "NOP"
+	},
+	{
+		.Opcode = RC_OPCODE_ILLEGAL_OPCODE,
+		.Name = "ILLEGAL OPCODE"
+	},
+	{
+		.Opcode = RC_OPCODE_ABS,
+		.Name = "ABS",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_ADD,
+		.Name = "ADD",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_ARL,
+		.Name = "ARL",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_CEIL,
+		.Name = "CEIL",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_CLAMP,
+		.Name = "CLAMP",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_CMP,
+		.Name = "CMP",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_CND,
+		.Name = "CND",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_COS,
+		.Name = "COS",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DDX,
+		.Name = "DDX",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DDY,
+		.Name = "DDY",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DP2,
+		.Name = "DP2",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DP3,
+		.Name = "DP3",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DP4,
+		.Name = "DP4",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DPH,
+		.Name = "DPH",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DST,
+		.Name = "DST",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_EX2,
+		.Name = "EX2",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_EXP,
+		.Name = "EXP",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_FLR,
+		.Name = "FLR",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_FRC,
+		.Name = "FRC",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_KIL,
+		.Name = "KIL",
+		.NumSrcRegs = 1
+	},
+	{
+		.Opcode = RC_OPCODE_LG2,
+		.Name = "LG2",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_LIT,
+		.Name = "LIT",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_LOG,
+		.Name = "LOG",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_LRP,
+		.Name = "LRP",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MAD,
+		.Name = "MAD",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MAX,
+		.Name = "MAX",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MIN,
+		.Name = "MIN",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MOV,
+		.Name = "MOV",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MUL,
+		.Name = "MUL",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_POW,
+		.Name = "POW",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_RCP,
+		.Name = "RCP",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_RSQ,
+		.Name = "RSQ",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SCS,
+		.Name = "SCS",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SEQ,
+		.Name = "SEQ",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SFL,
+		.Name = "SFL",
+		.NumSrcRegs = 0,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SGE,
+		.Name = "SGE",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SGT,
+		.Name = "SGT",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SIN,
+		.Name = "SIN",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SLE,
+		.Name = "SLE",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SLT,
+		.Name = "SLT",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SNE,
+		.Name = "SNE",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SSG,
+		.Name = "SSG",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SUB,
+		.Name = "SUB",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SWZ,
+		.Name = "SWZ",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_XPD,
+		.Name = "XPD",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TEX,
+		.Name = "TEX",
+		.HasTexture = 1,
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TXB,
+		.Name = "TXB",
+		.HasTexture = 1,
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TXD,
+		.Name = "TXD",
+		.HasTexture = 1,
+		.NumSrcRegs = 3,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TXL,
+		.Name = "TXL",
+		.HasTexture = 1,
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TXP,
+		.Name = "TXP",
+		.HasTexture = 1,
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_IF,
+		.Name = "IF",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 1
+	},
+	{
+		.Opcode = RC_OPCODE_ELSE,
+		.Name = "ELSE",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_ENDIF,
+		.Name = "ENDIF",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_BGNLOOP,
+		.Name = "BGNLOOP",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_BRK,
+		.Name = "BRK",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_ENDLOOP,
+		.Name = "ENDLOOP",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0,
+	},
+	{
+		.Opcode = RC_OPCODE_CONT,
+		.Name = "CONT",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_REPL_ALPHA,
+		.Name = "REPL_ALPHA",
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_BEGIN_TEX,
+		.Name = "BEGIN_TEX"
+	},
+	{
+		.Opcode = RC_OPCODE_KILP,
+		.Name = "KILP",
+	}
+};
+
+void rc_compute_sources_for_writemask(
+		const struct rc_instruction *inst,
+		unsigned int writemask,
+		unsigned int *srcmasks)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	srcmasks[0] = 0;
+	srcmasks[1] = 0;
+	srcmasks[2] = 0;
+
+	if (opcode->Opcode == RC_OPCODE_KIL)
+		srcmasks[0] |= RC_MASK_XYZW;
+	else if (opcode->Opcode == RC_OPCODE_IF)
+		srcmasks[0] |= RC_MASK_X;
+
+	if (!writemask)
+		return;
+
+	if (opcode->IsComponentwise) {
+		for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+			srcmasks[src] |= writemask;
+	} else if (opcode->IsStandardScalar) {
+		for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+			srcmasks[src] |= RC_MASK_X;
+	} else {
+		switch(opcode->Opcode) {
+		case RC_OPCODE_ARL:
+			srcmasks[0] |= RC_MASK_X;
+			break;
+		case RC_OPCODE_DP2:
+			srcmasks[0] |= RC_MASK_XY;
+			srcmasks[1] |= RC_MASK_XY;
+			break;
+		case RC_OPCODE_DP3:
+		case RC_OPCODE_XPD:
+			srcmasks[0] |= RC_MASK_XYZ;
+			srcmasks[1] |= RC_MASK_XYZ;
+			break;
+		case RC_OPCODE_DP4:
+			srcmasks[0] |= RC_MASK_XYZW;
+			srcmasks[1] |= RC_MASK_XYZW;
+			break;
+		case RC_OPCODE_DPH:
+			srcmasks[0] |= RC_MASK_XYZ;
+			srcmasks[1] |= RC_MASK_XYZW;
+			break;
+		case RC_OPCODE_TXB:
+		case RC_OPCODE_TXP:
+		case RC_OPCODE_TXL:
+			srcmasks[0] |= RC_MASK_W;
+			/* Fall through */
+		case RC_OPCODE_TEX:
+			switch (inst->U.I.TexSrcTarget) {
+				case RC_TEXTURE_1D:
+					srcmasks[0] |= RC_MASK_X;
+					break;
+				case RC_TEXTURE_2D:
+				case RC_TEXTURE_RECT:
+				case RC_TEXTURE_1D_ARRAY:
+					srcmasks[0] |= RC_MASK_XY;
+					break;
+				case RC_TEXTURE_3D:
+				case RC_TEXTURE_CUBE:
+				case RC_TEXTURE_2D_ARRAY:
+					srcmasks[0] |= RC_MASK_XYZ;
+					break;
+			}
+			break;
+		case RC_OPCODE_TXD:
+			switch (inst->U.I.TexSrcTarget) {
+				case RC_TEXTURE_1D_ARRAY:
+					srcmasks[0] |= RC_MASK_Y;
+					/* Fall through. */
+				case RC_TEXTURE_1D:
+					srcmasks[0] |= RC_MASK_X;
+					srcmasks[1] |= RC_MASK_X;
+					srcmasks[2] |= RC_MASK_X;
+					break;
+				case RC_TEXTURE_2D_ARRAY:
+					srcmasks[0] |= RC_MASK_Z;
+					/* Fall through. */
+				case RC_TEXTURE_2D:
+				case RC_TEXTURE_RECT:
+					srcmasks[0] |= RC_MASK_XY;
+					srcmasks[1] |= RC_MASK_XY;
+					srcmasks[2] |= RC_MASK_XY;
+					break;
+				case RC_TEXTURE_3D:
+				case RC_TEXTURE_CUBE:
+					srcmasks[0] |= RC_MASK_XYZ;
+					srcmasks[1] |= RC_MASK_XYZ;
+					srcmasks[2] |= RC_MASK_XYZ;
+					break;
+			}
+			break;
+		case RC_OPCODE_DST:
+			srcmasks[0] |= RC_MASK_Y | RC_MASK_Z;
+			srcmasks[1] |= RC_MASK_Y | RC_MASK_W;
+			break;
+		case RC_OPCODE_EXP:
+		case RC_OPCODE_LOG:
+			srcmasks[0] |= RC_MASK_XY;
+			break;
+		case RC_OPCODE_LIT:
+			srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W;
+			break;
+		default:
+			break;
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
new file mode 100644
index 00000000000..b5868820611
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_OPCODES_H
+#define RADEON_OPCODES_H
+
+#include <assert.h>
+
+/**
+ * Opcodes understood by the Radeon compiler.
+ */
+typedef enum {
+	RC_OPCODE_NOP = 0,
+	RC_OPCODE_ILLEGAL_OPCODE,
+
+	/** vec4 instruction: dst.c = abs(src0.c); */
+	RC_OPCODE_ABS,
+
+	/** vec4 instruction: dst.c = src0.c + src1.c; */
+	RC_OPCODE_ADD,
+
+	/** special instruction: load address register
+	 * dst.x = floor(src.x), where dst must be an address register */
+	RC_OPCODE_ARL,
+
+	/** vec4 instruction: dst.c = ceil(src0.c) */
+	RC_OPCODE_CEIL,
+
+	/** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */
+	RC_OPCODE_CLAMP,
+
+	/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
+	RC_OPCODE_CMP,
+
+	/** vec4 instruction: dst.c = src2.c > 0.5 ? src0.c : src1.c */
+	RC_OPCODE_CND,
+
+	/** scalar instruction: dst = cos(src0.x) */
+	RC_OPCODE_COS,
+
+	/** special instruction: take vec4 partial derivative in X direction
+	 * dst.c = d src0.c / dx */
+	RC_OPCODE_DDX,
+
+	/** special instruction: take vec4 partial derivative in Y direction
+	 * dst.c = d src0.c / dy */
+	RC_OPCODE_DDY,
+
+	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */
+	RC_OPCODE_DP2,
+
+	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
+	RC_OPCODE_DP3,
+
+	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */
+	RC_OPCODE_DP4,
+
+	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */
+	RC_OPCODE_DPH,
+
+	/** special instruction, see ARB_fragment_program */
+	RC_OPCODE_DST,
+
+	/** scalar instruction: dst = 2**src0.x */
+	RC_OPCODE_EX2,
+
+	/** special instruction, see ARB_vertex_program */
+	RC_OPCODE_EXP,
+
+	/** vec4 instruction: dst.c = floor(src0.c) */
+	RC_OPCODE_FLR,
+
+	/** vec4 instruction: dst.c = src0.c - floor(src0.c) */
+	RC_OPCODE_FRC,
+
+	/** special instruction: stop execution if any component of src0 is negative */
+	RC_OPCODE_KIL,
+
+	/** scalar instruction: dst = log_2(src0.x) */
+	RC_OPCODE_LG2,
+
+	/** special instruction, see ARB_vertex_program */
+	RC_OPCODE_LIT,
+
+	/** special instruction, see ARB_vertex_program */
+	RC_OPCODE_LOG,
+
+	/** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */
+	RC_OPCODE_LRP,
+
+	/** vec4 instruction: dst.c = src0.c*src1.c + src2.c */
+	RC_OPCODE_MAD,
+
+	/** vec4 instruction: dst.c = max(src0.c, src1.c) */
+	RC_OPCODE_MAX,
+
+	/** vec4 instruction: dst.c = min(src0.c, src1.c) */
+	RC_OPCODE_MIN,
+
+	/** vec4 instruction: dst.c = src0.c */
+	RC_OPCODE_MOV,
+
+	/** vec4 instruction: dst.c = src0.c*src1.c */
+	RC_OPCODE_MUL,
+
+	/** scalar instruction: dst = src0.x ** src1.x */
+	RC_OPCODE_POW,
+
+	/** scalar instruction: dst = 1 / src0.x */
+	RC_OPCODE_RCP,
+
+	/** scalar instruction: dst = 1 / sqrt(src0.x) */
+	RC_OPCODE_RSQ,
+
+	/** special instruction, see ARB_fragment_program */
+	RC_OPCODE_SCS,
+
+	/** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SEQ,
+
+	/** vec4 instruction: dst.c = 0.0 */
+	RC_OPCODE_SFL,
+
+	/** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SGE,
+
+	/** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SGT,
+
+	/** scalar instruction: dst = sin(src0.x) */
+	RC_OPCODE_SIN,
+
+	/** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SLE,
+
+	/** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SLT,
+
+	/** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SNE,
+
+	/** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */
+	RC_OPCODE_SSG,
+
+	/** vec4 instruction: dst.c = src0.c - src1.c */
+	RC_OPCODE_SUB,
+
+	/** vec4 instruction: dst.c = src0.c */
+	RC_OPCODE_SWZ,
+
+	/** special instruction, see ARB_fragment_program */
+	RC_OPCODE_XPD,
+
+	RC_OPCODE_TEX,
+	RC_OPCODE_TXB,
+	RC_OPCODE_TXD,
+	RC_OPCODE_TXL,
+	RC_OPCODE_TXP,
+
+	/** branch instruction:
+	 * If src0.x != 0.0, continue with the next instruction;
+	 * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF.
+	 */
+	RC_OPCODE_IF,
+
+	/** branch instruction: jump to matching RC_OPCODE_ENDIF */
+	RC_OPCODE_ELSE,
+
+	/** branch instruction: has no effect */
+	RC_OPCODE_ENDIF,
+	
+	RC_OPCODE_BGNLOOP,
+
+	RC_OPCODE_BRK,
+
+	RC_OPCODE_ENDLOOP,
+
+	RC_OPCODE_CONT,
+
+	/** special instruction, used in R300-R500 fragment program pair instructions
+	 * indicates that the result of the alpha operation shall be replicated
+	 * across all other channels */
+	RC_OPCODE_REPL_ALPHA,
+
+	/** special instruction, used in R300-R500 fragment programs
+	 * to indicate the start of a block of texture instructions that
+	 * can run simultaneously. */
+	RC_OPCODE_BEGIN_TEX,
+
+	/** Stop execution of the shader (GLSL discard) */
+	RC_OPCODE_KILP,
+
+	MAX_RC_OPCODE
+} rc_opcode;
+
+
+struct rc_opcode_info {
+	rc_opcode Opcode;
+	const char * Name;
+
+	/** true if the instruction reads from a texture.
+	 *
+	 * \note This is false for the KIL instruction, even though KIL is
+	 * a texture instruction from a hardware point of view. */
+	unsigned int HasTexture:1;
+
+	unsigned int NumSrcRegs:2;
+	unsigned int HasDstReg:1;
+
+	/** true if this instruction affects control flow */
+	unsigned int IsFlowControl:1;
+
+	/** true if this is a vector instruction that operates on components in parallel
+	 * without any cross-component interaction */
+	unsigned int IsComponentwise:1;
+
+	/** true if this instruction sources only its operands X components
+	 * to compute one result which is smeared across all output channels */
+	unsigned int IsStandardScalar:1;
+};
+
+extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
+
+static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode)
+{
+	assert((unsigned int)opcode < MAX_RC_OPCODE);
+	assert(rc_opcodes[opcode].Opcode == opcode);
+
+	return &rc_opcodes[opcode];
+}
+
+struct rc_instruction;
+
+void rc_compute_sources_for_writemask(
+		const struct rc_instruction *inst,
+		unsigned int writemask,
+		unsigned int *srcmasks);
+
+#endif /* RADEON_OPCODES_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c
new file mode 100644
index 00000000000..39dcb21d4f4
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c
@@ -0,0 +1,700 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_swizzle.h"
+
+struct src_clobbered_reads_cb_data {
+	rc_register_file File;
+	unsigned int Index;
+	unsigned int Mask;
+	struct rc_reader_data * ReaderData;
+};
+
+typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
+						struct rc_instruction *,
+						unsigned int);
+
+static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
+{
+	struct rc_src_register combine;
+	combine.File = inner.File;
+	combine.Index = inner.Index;
+	combine.RelAddr = inner.RelAddr;
+	if (outer.Abs) {
+		combine.Abs = 1;
+		combine.Negate = outer.Negate;
+	} else {
+		combine.Abs = inner.Abs;
+		combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
+		combine.Negate ^= outer.Negate;
+	}
+	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
+	return combine;
+}
+
+static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
+						struct rc_src_register * src)
+{
+	rc_register_file file = src->File;
+	struct rc_reader_data * reader_data = data;
+
+	if(!rc_inst_can_use_presub(inst,
+				reader_data->Writer->U.I.PreSub.Opcode,
+				rc_swizzle_to_writemask(src->Swizzle),
+				src,
+				&reader_data->Writer->U.I.PreSub.SrcReg[0],
+				&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
+		reader_data->Abort = 1;
+		return;
+	}
+
+	/* XXX This could probably be handled better. */
+	if (file == RC_FILE_ADDRESS) {
+		reader_data->Abort = 1;
+		return;
+	}
+
+	/* These instructions cannot read from the constants file.
+	 * see radeonTransformTEX()
+	 */
+	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+				(inst->U.I.Opcode == RC_OPCODE_TEX ||
+				inst->U.I.Opcode == RC_OPCODE_TXB ||
+				inst->U.I.Opcode == RC_OPCODE_TXP ||
+				inst->U.I.Opcode == RC_OPCODE_TXD ||
+				inst->U.I.Opcode == RC_OPCODE_TXL ||
+				inst->U.I.Opcode == RC_OPCODE_KIL)){
+		reader_data->Abort = 1;
+		return;
+	}
+}
+
+static void src_clobbered_reads_cb(
+	void * data,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	struct src_clobbered_reads_cb_data * sc_data = data;
+
+	if (src->File == sc_data->File
+	    && src->Index == sc_data->Index
+	    && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
+
+		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
+	}
+
+	if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
+		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
+	}
+}
+
+static void is_src_clobbered_scan_write(
+	void * data,
+	struct rc_instruction * inst,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int mask)
+{
+	struct src_clobbered_reads_cb_data sc_data;
+	struct rc_reader_data * reader_data = data;
+	sc_data.File = file;
+	sc_data.Index = index;
+	sc_data.Mask = mask;
+	sc_data.ReaderData = reader_data;
+	rc_for_all_reads_src(reader_data->Writer,
+					src_clobbered_reads_cb, &sc_data);
+}
+
+static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+{
+	struct rc_reader_data reader_data;
+	unsigned int i;
+
+	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+	    inst_mov->U.I.WriteALUResult ||
+	    inst_mov->U.I.SaturateMode)
+		return;
+
+	/* Get a list of all the readers of this MOV instruction. */
+	reader_data.ExitOnAbort = 1;
+	rc_get_readers(c, inst_mov, &reader_data,
+		       copy_propagate_scan_read, NULL,
+		       is_src_clobbered_scan_write);
+
+	if (reader_data.Abort || reader_data.ReaderCount == 0)
+		return;
+
+	/* Propagate the MOV instruction. */
+	for (i = 0; i < reader_data.ReaderCount; i++) {
+		struct rc_instruction * inst = reader_data.Readers[i].Inst;
+		*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
+
+		if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
+			inst->U.I.PreSub = inst_mov->U.I.PreSub;
+	}
+
+	/* Finally, remove the original MOV instruction */
+	rc_remove_instruction(inst_mov);
+}
+
+/**
+ * Check if a source register is actually always the same
+ * swizzle constant.
+ */
+static int is_src_uniform_constant(struct rc_src_register src,
+		rc_swizzle * pswz, unsigned int * pnegate)
+{
+	int have_used = 0;
+
+	if (src.File != RC_FILE_NONE) {
+		*pswz = 0;
+		return 0;
+	}
+
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		unsigned int swz = GET_SWZ(src.Swizzle, chan);
+		if (swz < 4) {
+			*pswz = 0;
+			return 0;
+		}
+		if (swz == RC_SWIZZLE_UNUSED)
+			continue;
+
+		if (!have_used) {
+			*pswz = swz;
+			*pnegate = GET_BIT(src.Negate, chan);
+			have_used = 1;
+		} else {
+			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
+				*pswz = 0;
+				return 0;
+			}
+		}
+	}
+
+	return 1;
+}
+
+static void constant_folding_mad(struct rc_instruction * inst)
+{
+	rc_swizzle swz = 0;
+	unsigned int negate= 0;
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MUL;
+			return;
+		}
+	}
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ONE) {
+			inst->U.I.Opcode = RC_OPCODE_ADD;
+			if (negate)
+				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
+			return;
+		} else if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+			return;
+		}
+	}
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ONE) {
+			inst->U.I.Opcode = RC_OPCODE_ADD;
+			if (negate)
+				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+			return;
+		} else if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+			return;
+		}
+	}
+}
+
+static void constant_folding_mul(struct rc_instruction * inst)
+{
+	rc_swizzle swz = 0;
+	unsigned int negate = 0;
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ONE) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+			if (negate)
+				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+			return;
+		} else if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+			return;
+		}
+	}
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ONE) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			if (negate)
+				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+			return;
+		} else if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+			return;
+		}
+	}
+}
+
+static void constant_folding_add(struct rc_instruction * inst)
+{
+	rc_swizzle swz = 0;
+	unsigned int negate = 0;
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+			return;
+		}
+	}
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			return;
+		}
+	}
+}
+
+/**
+ * Replace 0.0, 1.0 and 0.5 immediate constants by their
+ * respective swizzles. Simplify instructions like ADD dst, src, 0;
+ */
+static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	unsigned int i;
+
+	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
+	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+		struct rc_constant * constant;
+		struct rc_src_register newsrc;
+		int have_real_reference;
+		unsigned int chan;
+
+		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
+		for (chan = 0; chan < 4; ++chan)
+			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
+				break;
+		if (chan == 4) {
+			inst->U.I.SrcReg[src].File = RC_FILE_NONE;
+			continue;
+		}
+
+		/* Convert immediates to swizzles. */
+		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
+		    inst->U.I.SrcReg[src].RelAddr ||
+		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
+			continue;
+
+		constant =
+			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
+
+		if (constant->Type != RC_CONSTANT_IMMEDIATE)
+			continue;
+
+		newsrc = inst->U.I.SrcReg[src];
+		have_real_reference = 0;
+		for (chan = 0; chan < 4; ++chan) {
+			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
+			unsigned int newswz;
+			float imm;
+			float baseimm;
+
+			if (swz >= 4)
+				continue;
+
+			imm = constant->u.Immediate[swz];
+			baseimm = imm;
+			if (imm < 0.0)
+				baseimm = -baseimm;
+
+			if (baseimm == 0.0) {
+				newswz = RC_SWIZZLE_ZERO;
+			} else if (baseimm == 1.0) {
+				newswz = RC_SWIZZLE_ONE;
+			} else if (baseimm == 0.5 && c->has_half_swizzles) {
+				newswz = RC_SWIZZLE_HALF;
+			} else {
+				have_real_reference = 1;
+				continue;
+			}
+
+			SET_SWZ(newsrc.Swizzle, chan, newswz);
+			if (imm < 0.0 && !newsrc.Abs)
+				newsrc.Negate ^= 1 << chan;
+		}
+
+		if (!have_real_reference) {
+			newsrc.File = RC_FILE_NONE;
+			newsrc.Index = 0;
+		}
+
+		/* don't make the swizzle worse */
+		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
+		    c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+			continue;
+
+		inst->U.I.SrcReg[src] = newsrc;
+	}
+
+	/* Simplify instructions based on constants */
+	if (inst->U.I.Opcode == RC_OPCODE_MAD)
+		constant_folding_mad(inst);
+
+	/* note: MAD can simplify to MUL or ADD */
+	if (inst->U.I.Opcode == RC_OPCODE_MUL)
+		constant_folding_mul(inst);
+	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
+		constant_folding_add(inst);
+
+	/* In case this instruction has been converted, make sure all of the
+	 * registers that are no longer used are empty. */
+	opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	for(i = opcode->NumSrcRegs; i < 3; i++) {
+		memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
+	}
+}
+
+/**
+ * If src and dst use the same register, this function returns a writemask that
+ * indicates wich components are read by src.  Otherwise zero is returned.
+ */
+static unsigned int src_reads_dst_mask(struct rc_src_register src,
+						struct rc_dst_register dst)
+{
+	if (dst.File != src.File || dst.Index != src.Index) {
+		return 0;
+	}
+	return rc_swizzle_to_writemask(src.Swizzle);
+}
+
+/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
+ * in any of its channels.  Return 0 otherwise. */
+static int src_has_const_swz(struct rc_src_register src) {
+	int chan;
+	for(chan = 0; chan < 4; chan++) {
+		unsigned int swz = GET_SWZ(src.Swizzle, chan);
+		if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
+						|| swz == RC_SWIZZLE_ONE) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static void presub_scan_read(
+	void * data,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	struct rc_reader_data * reader_data = data;
+	rc_presubtract_op * presub_opcode = reader_data->CbData;
+
+	if (!rc_inst_can_use_presub(inst, *presub_opcode,
+			reader_data->Writer->U.I.DstReg.WriteMask,
+			src,
+			&reader_data->Writer->U.I.SrcReg[0],
+			&reader_data->Writer->U.I.SrcReg[1])) {
+		reader_data->Abort = 1;
+		return;
+	}
+}
+
+static int presub_helper(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst_add,
+	rc_presubtract_op presub_opcode,
+	rc_presub_replace_fn presub_replace)
+{
+	struct rc_reader_data reader_data;
+	unsigned int i;
+	rc_presubtract_op cb_op = presub_opcode;
+
+	reader_data.CbData = &cb_op;
+	reader_data.ExitOnAbort = 1;
+	rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
+						is_src_clobbered_scan_write);
+
+	if (reader_data.Abort || reader_data.ReaderCount == 0)
+		return 0;
+
+	for(i = 0; i < reader_data.ReaderCount; i++) {
+		unsigned int src_index;
+		struct rc_reader reader = reader_data.Readers[i];
+		const struct rc_opcode_info * info =
+				rc_get_opcode_info(reader.Inst->U.I.Opcode);
+
+		for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
+			if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
+				presub_replace(inst_add, reader.Inst, src_index);
+		}
+	}
+	return 1;
+}
+
+/* This function assumes that inst_add->U.I.SrcReg[0] and
+ * inst_add->U.I.SrcReg[1] aren't both negative. */
+static void presub_replace_add(
+	struct rc_instruction * inst_add,
+	struct rc_instruction * inst_reader,
+	unsigned int src_index)
+{
+	rc_presubtract_op presub_opcode;
+	if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
+		presub_opcode = RC_PRESUB_SUB;
+	else
+		presub_opcode = RC_PRESUB_ADD;
+
+	if (inst_add->U.I.SrcReg[1].Negate) {
+		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
+	} else {
+		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
+		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
+	}
+	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
+	inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
+	inst_reader->U.I.PreSub.Opcode = presub_opcode;
+	inst_reader->U.I.SrcReg[src_index] =
+			chain_srcregs(inst_reader->U.I.SrcReg[src_index],
+					inst_reader->U.I.PreSub.SrcReg[0]);
+	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+	inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
+}
+
+static int is_presub_candidate(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
+	unsigned int i;
+	unsigned int is_constant[2] = {0, 0};
+
+	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
+
+	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
+			|| inst->U.I.SaturateMode
+			|| inst->U.I.WriteALUResult) {
+		return 0;
+	}
+
+	/* If both sources use a constant swizzle, then we can't convert it to
+	 * a presubtract operation.  In fact for the ADD and SUB presubtract
+	 * operations neither source can contain a constant swizzle.  This
+	 * specific case is checked in peephole_add_presub_add() when
+	 * we make sure the swizzles for both sources are equal, so we
+	 * don't need to worry about it here. */
+	for (i = 0; i < 2; i++) {
+		int chan;
+		for (chan = 0; chan < 4; chan++) {
+			rc_swizzle swz =
+				get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
+			if (swz == RC_SWIZZLE_ONE
+					|| swz == RC_SWIZZLE_ZERO
+					|| swz == RC_SWIZZLE_HALF) {
+				is_constant[i] = 1;
+			}
+		}
+	}
+	if (is_constant[0] && is_constant[1])
+		return 0;
+
+	for(i = 0; i < info->NumSrcRegs; i++) {
+		struct rc_src_register src = inst->U.I.SrcReg[i];
+		if (src_reads_dst_mask(src, inst->U.I.DstReg))
+			return 0;
+
+		src.File = RC_FILE_PRESUB;
+		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
+			return 0;
+	}
+	return 1;
+}
+
+static int peephole_add_presub_add(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst_add)
+{
+	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
+        unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
+        unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
+
+	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
+		return 0;
+
+	/* src0 and src1 can't have absolute values */
+	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
+	        return 0;
+
+	/* presub_replace_add() assumes only one is negative */
+	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
+	        return 0;
+
+        /* if src0 is negative, at least all bits of dstmask have to be set */
+        if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
+	        return 0;
+
+        /* if src1 is negative, at least all bits of dstmask have to be set */
+        if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
+	        return 0;
+
+	if (!is_presub_candidate(c, inst_add))
+		return 0;
+
+	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
+		rc_remove_instruction(inst_add);
+		return 1;
+	}
+	return 0;
+}
+
+static void presub_replace_inv(
+	struct rc_instruction * inst_add,
+	struct rc_instruction * inst_reader,
+	unsigned int src_index)
+{
+	/* We must be careful not to modify inst_add, since it
+	 * is possible it will remain part of the program.*/
+	inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
+	inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
+	inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
+						inst_reader->U.I.PreSub.SrcReg[0]);
+
+	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+	inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
+}
+
+/**
+ * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
+ * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
+ * of the add instruction must have the constatnt 1 swizzle.  This function
+ * does not check const registers to see if their value is 1.0, so it should
+ * be called after the constant_folding optimization.
+ * @return
+ * 	0 if the ADD instruction is still part of the program.
+ * 	1 if the ADD instruction is no longer part of the program.
+ */
+static int peephole_add_presub_inv(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst_add)
+{
+	unsigned int i, swz;
+
+	if (!is_presub_candidate(c, inst_add))
+		return 0;
+
+	/* Check if src0 is 1. */
+	/* XXX It would be nice to use is_src_uniform_constant here, but that
+	 * function only works if the register's file is RC_FILE_NONE */
+	for(i = 0; i < 4; i++ ) {
+		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
+		if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
+						&& swz != RC_SWIZZLE_ONE) {
+			return 0;
+		}
+	}
+
+	/* Check src1. */
+	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
+						inst_add->U.I.DstReg.WriteMask
+		|| inst_add->U.I.SrcReg[1].Abs
+		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
+			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
+		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
+
+		return 0;
+	}
+
+	if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
+		rc_remove_instruction(inst_add);
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * @return
+ * 	0 if inst is still part of the program.
+ * 	1 if inst is no longer part of the program.
+ */
+static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+	switch(inst->U.I.Opcode){
+	case RC_OPCODE_ADD:
+		if (c->has_presub) {
+			if(peephole_add_presub_inv(c, inst))
+				return 1;
+			if(peephole_add_presub_add(c, inst))
+				return 1;
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+void rc_optimize(struct radeon_compiler * c, void *user)
+{
+	struct rc_instruction * inst = c->Program.Instructions.Next;
+	while(inst != &c->Program.Instructions) {
+		struct rc_instruction * cur = inst;
+		inst = inst->Next;
+
+		constant_folding(c, cur);
+
+		if(peephole(c, cur))
+			continue;
+
+		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
+			copy_propagate(c, cur);
+			/* cur may no longer be part of the program */
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c
new file mode 100644
index 00000000000..1e9a2c09d44
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_dead_sources.c
@@ -0,0 +1,62 @@
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_opcodes.h"
+#include "radeon_program_pair.h"
+
+static void mark_used_presub(struct rc_pair_sub_instruction * sub)
+{
+	if (sub->Src[RC_PAIR_PRESUB_SRC].Used) {
+		unsigned int presub_reg_count = rc_presubtract_src_reg_count(
+					sub->Src[RC_PAIR_PRESUB_SRC].Index);
+		unsigned int i;
+		for (i = 0; i < presub_reg_count; i++) {
+			sub->Src[i].Used = 1;
+		}
+	}
+}
+
+static void mark_used(
+	struct rc_instruction * inst,
+	struct rc_pair_sub_instruction * sub)
+{
+	unsigned int i;
+	const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+	for (i = 0; i < info->NumSrcRegs; i++) {
+		unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
+		if (src_type & RC_SOURCE_RGB) {
+			inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1;
+		}
+
+		if (src_type & RC_SOURCE_ALPHA) {
+			inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1;
+		}
+	}
+}
+
+/**
+ * This pass finds sources that are not used by their instruction and marks
+ * them as unused. 
+ */
+void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user)
+{
+	struct rc_instruction * inst;
+	for (inst = c->Program.Instructions.Next;
+					inst != &c->Program.Instructions;
+					inst = inst->Next) {
+		unsigned int i;
+		if (inst->Type == RC_INSTRUCTION_NORMAL)
+			continue;
+
+		/* Mark all sources as unused */
+		for (i = 0; i < 4; i++) {
+			inst->U.P.RGB.Src[i].Used = 0;
+			inst->U.P.Alpha.Src[i].Used = 0;
+		}
+		mark_used(inst, &inst->U.P.RGB);
+		mark_used(inst, &inst->U.P.Alpha);
+
+		mark_used_presub(&inst->U.P.RGB);
+		mark_used_presub(&inst->U.P.Alpha);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
new file mode 100644
index 00000000000..49983d6ce75
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
@@ -0,0 +1,706 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "main/glheader.h"
+#include "program/register_allocate.h"
+#include "ralloc.h"
+
+#include "r300_fragprog_swizzle.h"
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_list.h"
+#include "radeon_variable.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+
+struct register_info {
+	struct live_intervals Live[4];
+
+	unsigned int Used:1;
+	unsigned int Allocated:1;
+	unsigned int File:3;
+	unsigned int Index:RC_REGISTER_INDEX_BITS;
+	unsigned int Writemask;
+};
+
+struct regalloc_state {
+	struct radeon_compiler * C;
+
+	struct register_info * Input;
+	unsigned int NumInputs;
+
+	struct register_info * Temporary;
+	unsigned int NumTemporaries;
+
+	unsigned int Simple;
+	int LoopEnd;
+};
+
+enum rc_reg_class {
+	RC_REG_CLASS_SINGLE,
+	RC_REG_CLASS_DOUBLE,
+	RC_REG_CLASS_TRIPLE,
+	RC_REG_CLASS_ALPHA,
+	RC_REG_CLASS_SINGLE_PLUS_ALPHA,
+	RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
+	RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
+	RC_REG_CLASS_X,
+	RC_REG_CLASS_Y,
+	RC_REG_CLASS_Z,
+	RC_REG_CLASS_XY,
+	RC_REG_CLASS_YZ,
+	RC_REG_CLASS_XZ,
+	RC_REG_CLASS_XW,
+	RC_REG_CLASS_YW,
+	RC_REG_CLASS_ZW,
+	RC_REG_CLASS_XYW,
+	RC_REG_CLASS_YZW,
+	RC_REG_CLASS_XZW,
+	RC_REG_CLASS_COUNT
+};
+
+struct rc_class {
+	enum rc_reg_class Class;
+
+	unsigned int WritemaskCount;
+
+	/** This is 1 if this class is being used by the register allocator
+	 * and 0 otherwise */
+	unsigned int Used;
+
+	/** This is the ID number assigned to this class by ra. */
+	unsigned int Id;
+
+	/** List of writemasks that belong to this class */
+	unsigned int Writemasks[3];
+
+
+};
+
+static void print_live_intervals(struct live_intervals * src)
+{
+	if (!src || !src->Used) {
+		DBG("(null)");
+		return;
+	}
+
+	DBG("(%i,%i)", src->Start, src->End);
+}
+
+static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
+{
+	if (VERBOSE) {
+		DBG("overlap_live_intervals: ");
+		print_live_intervals(a);
+		DBG(" to ");
+		print_live_intervals(b);
+		DBG("\n");
+	}
+
+	if (!a->Used || !b->Used) {
+		DBG("    unused interval\n");
+		return 0;
+	}
+
+	if (a->Start > b->Start) {
+		if (a->Start < b->End) {
+			DBG("    overlap\n");
+			return 1;
+		}
+	} else if (b->Start > a->Start) {
+		if (b->Start < a->End) {
+			DBG("    overlap\n");
+			return 1;
+		}
+	} else { /* a->Start == b->Start */
+		if (a->Start != a->End && b->Start != b->End) {
+			DBG("    overlap\n");
+			return 1;
+		}
+	}
+
+	DBG("    no overlap\n");
+
+	return 0;
+}
+
+static void scan_read_callback(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct regalloc_state * s = data;
+	struct register_info * reg;
+	unsigned int i;
+
+	if (file != RC_FILE_INPUT)
+		return;
+
+	s->Input[index].Used = 1;
+	reg = &s->Input[index];
+
+	for (i = 0; i < 4; i++) {
+		if (!((mask >> i) & 0x1)) {
+			continue;
+		}
+		reg->Live[i].Used = 1;
+		reg->Live[i].Start = 0;
+		reg->Live[i].End =
+			s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
+	}
+}
+
+static void remap_register(void * data, struct rc_instruction * inst,
+		rc_register_file * file, unsigned int * index)
+{
+	struct regalloc_state * s = data;
+	const struct register_info * reg;
+
+	if (*file == RC_FILE_TEMPORARY && s->Simple)
+		reg = &s->Temporary[*index];
+	else if (*file == RC_FILE_INPUT)
+		reg = &s->Input[*index];
+	else
+		return;
+
+	if (reg->Allocated) {
+		*index = reg->Index;
+	}
+}
+
+static void alloc_input_simple(void * data, unsigned int input,
+							unsigned int hwreg)
+{
+	struct regalloc_state * s = data;
+
+	if (input >= s->NumInputs)
+		return;
+
+	s->Input[input].Allocated = 1;
+	s->Input[input].File = RC_FILE_TEMPORARY;
+	s->Input[input].Index = hwreg;
+}
+
+/* This functions offsets the temporary register indices by the number
+ * of input registers, because input registers are actually temporaries and
+ * should not occupy the same space.
+ *
+ * This pass is supposed to be used to maintain correct allocation of inputs
+ * if the standard register allocation is disabled. */
+static void do_regalloc_inputs_only(struct regalloc_state * s)
+{
+	for (unsigned i = 0; i < s->NumTemporaries; i++) {
+		s->Temporary[i].Allocated = 1;
+		s->Temporary[i].File = RC_FILE_TEMPORARY;
+		s->Temporary[i].Index = i + s->NumInputs;
+	}
+}
+
+static unsigned int is_derivative(rc_opcode op)
+{
+	return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
+}
+
+static int find_class(
+	struct rc_class * classes,
+	unsigned int writemask,
+	unsigned int max_writemask_count)
+{
+	unsigned int i;
+	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+		unsigned int j;
+		if (classes[i].WritemaskCount > max_writemask_count) {
+			continue;
+		}
+		for (j = 0; j < 3; j++) {
+			if (classes[i].Writemasks[j] == writemask) {
+				return i;
+			}
+		}
+	}
+	return -1;
+}
+
+static enum rc_reg_class variable_get_class(
+	struct rc_variable * variable,
+	struct rc_class * classes)
+{
+	unsigned int i;
+	unsigned int can_change_writemask= 1;
+	unsigned int writemask = rc_variable_writemask_sum(variable);
+	struct rc_list * readers = rc_variable_readers_union(variable);
+	int class_index;
+
+	if (!variable->C->is_r500) {
+		struct rc_class c;
+		/* The assumption here is that if an instruction has type
+		 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
+		 * r300 and r400 can't swizzle the result of a TEX lookup. */
+		if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) {
+			writemask = RC_MASK_XYZW;
+		}
+
+		/* Check if it is possible to do swizzle packing for r300/r400
+		 * without creating non-native swizzles. */
+		class_index = find_class(classes, writemask, 3);
+		if (class_index < 0) {
+			goto error;
+		}
+		c = classes[class_index];
+		for (i = 0; i < c.WritemaskCount; i++) {
+			int j;
+			unsigned int conversion_swizzle =
+						rc_make_conversion_swizzle(
+						writemask, c.Writemasks[i]);
+			for (j = 0; j < variable->ReaderCount; j++) {
+				unsigned int old_swizzle;
+				unsigned int new_swizzle;
+				struct rc_reader r = variable->Readers[j];
+				if (r.Inst->Type == RC_INSTRUCTION_PAIR ) {
+					old_swizzle = r.U.P.Arg->Swizzle;
+				} else {
+					old_swizzle = r.U.I.Src->Swizzle;
+				}
+				new_swizzle = rc_adjust_channels(
+					old_swizzle, conversion_swizzle);
+				if (!r300_swizzle_is_native_basic(new_swizzle)) {
+					can_change_writemask = 0;
+					break;
+				}
+			}
+			if (!can_change_writemask) {
+				break;
+			}
+		}
+	}
+
+	if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
+		/* DDX/DDY seem to always fail when their writemasks are
+		 * changed.*/
+		if (is_derivative(variable->Inst->U.P.RGB.Opcode)
+		    || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
+			can_change_writemask = 0;
+		}
+	}
+	for ( ; readers; readers = readers->Next) {
+		struct rc_reader * r = readers->Item;
+		if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
+			if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
+				can_change_writemask = 0;
+				break;
+			}
+			/* DDX/DDY also fail when their swizzles are changed. */
+			if (is_derivative(r->Inst->U.P.RGB.Opcode)
+			    || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
+				can_change_writemask = 0;
+				break;
+			}
+		}
+	}
+
+	class_index = find_class(classes, writemask,
+						can_change_writemask ? 3 : 1);
+	if (class_index > -1) {
+		return classes[class_index].Class;
+	} else {
+error:
+		rc_error(variable->C,
+				"Could not find class for index=%u mask=%u\n",
+				variable->Dst.Index, writemask);
+		return 0;
+	}
+}
+
+static unsigned int overlap_live_intervals_array(
+	struct live_intervals * a,
+	struct live_intervals * b)
+{
+	unsigned int a_chan, b_chan;
+	for (a_chan = 0; a_chan < 4; a_chan++) {
+		for (b_chan = 0; b_chan < 4; b_chan++) {
+			if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
+					return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+static unsigned int reg_get_index(int reg)
+{
+	return reg / RC_MASK_XYZW;
+}
+
+static unsigned int reg_get_writemask(int reg)
+{
+	return (reg % RC_MASK_XYZW) + 1;
+}
+
+static int get_reg_id(unsigned int index, unsigned int writemask)
+{
+	assert(writemask);
+	if (writemask == 0) {
+		return 0;
+	}
+	return (index * RC_MASK_XYZW) + (writemask - 1);
+}
+
+#if VERBOSE
+static void print_reg(int reg)
+{
+	unsigned int index = reg_get_index(reg);
+	unsigned int mask = reg_get_writemask(reg);
+	fprintf(stderr, "Temp[%u].%c%c%c%c", index,
+		mask & RC_MASK_X ? 'x' : '_',
+		mask & RC_MASK_Y ? 'y' : '_',
+		mask & RC_MASK_Z ? 'z' : '_',
+		mask & RC_MASK_W ? 'w' : '_');
+}
+#endif
+
+static void add_register_conflicts(
+	struct ra_regs * regs,
+	unsigned int max_temp_regs)
+{
+	unsigned int index, a_mask, b_mask;
+	for (index = 0; index < max_temp_regs; index++) {
+		for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
+			for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
+								b_mask++) {
+				if (a_mask & b_mask) {
+					ra_add_reg_conflict(regs,
+						get_reg_id(index, a_mask),
+						get_reg_id(index, b_mask));
+				}
+			}
+		}
+	}
+}
+
+static void do_advanced_regalloc(struct regalloc_state * s)
+{
+	struct rc_class rc_class_list [] = {
+		{RC_REG_CLASS_SINGLE, 3, 0, 0,
+			{RC_MASK_X,
+			 RC_MASK_Y,
+			 RC_MASK_Z}},
+		{RC_REG_CLASS_DOUBLE, 3, 0, 0,
+			{RC_MASK_X | RC_MASK_Y,
+			 RC_MASK_X | RC_MASK_Z,
+			 RC_MASK_Y | RC_MASK_Z}},
+		{RC_REG_CLASS_TRIPLE, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
+			 RC_MASK_NONE,
+			 RC_MASK_NONE}},
+		{RC_REG_CLASS_ALPHA, 1, 0, 0,
+			{RC_MASK_W,
+			 RC_MASK_NONE,
+			 RC_MASK_NONE}},
+		{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
+			{RC_MASK_X | RC_MASK_W,
+			 RC_MASK_Y | RC_MASK_W,
+			 RC_MASK_Z | RC_MASK_W}},
+		{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
+			{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+			 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+			 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
+		{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_X, 1, 0, 0,
+			{RC_MASK_X,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_Y, 1, 0, 0,
+			{RC_MASK_Y,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_Z, 1, 0, 0,
+			{RC_MASK_Z,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_XY, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_Y,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_YZ, 1, 0, 0,
+			{RC_MASK_Y | RC_MASK_Z,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_XZ, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_Z,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_XW, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_YW, 1, 0, 0,
+			{RC_MASK_Y | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_ZW, 1, 0, 0,
+			{RC_MASK_Z | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_XYW, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_YZW, 1, 0, 0,
+			{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}},
+		{RC_REG_CLASS_XZW, 1, 0, 0,
+			{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+			RC_MASK_NONE,
+			RC_MASK_NONE}}
+	};
+
+	unsigned int i, j, index, input_node, node_count, node_index;
+	unsigned int * node_classes;
+	unsigned int * input_classes;
+	struct rc_instruction * inst;
+	struct rc_list * var_ptr;
+	struct rc_list * variables;
+	struct ra_regs * regs;
+	struct ra_graph * graph;
+
+	/* Allocate the main ra data structure */
+	regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW);
+
+	/* Get list of program variables */
+	variables = rc_get_variables(s->C);
+	node_count = rc_list_count(variables);
+	node_classes = memory_pool_malloc(&s->C->Pool,
+			node_count * sizeof(unsigned int));
+	input_classes = memory_pool_malloc(&s->C->Pool,
+			s->NumInputs * sizeof(unsigned int));
+
+	for (var_ptr = variables, node_index = 0; var_ptr;
+					var_ptr = var_ptr->Next, node_index++) {
+		unsigned int class_index;
+		/* Compute the live intervals */
+		rc_variable_compute_live_intervals(var_ptr->Item);
+
+		class_index = variable_get_class(var_ptr->Item,	rc_class_list);
+
+		/* If we haven't used this register class yet, mark it
+		 * as used and allocate space for it. */
+		if (!rc_class_list[class_index].Used) {
+			rc_class_list[class_index].Used = 1;
+			rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
+		}
+
+		node_classes[node_index] = rc_class_list[class_index].Id;
+	}
+
+
+	/* Assign registers to the classes */
+	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+		struct rc_class class = rc_class_list[i];
+		if (!class.Used) {
+			continue;
+		}
+
+		for (index = 0; index < s->C->max_temp_regs; index++) {
+			for (j = 0; j < class.WritemaskCount; j++) {
+				int reg_id = get_reg_id(index,
+							class.Writemasks[j]);
+				ra_class_add_reg(regs, class.Id, reg_id);
+			}
+		}
+	}
+
+	/* Add register conflicts */
+	add_register_conflicts(regs, s->C->max_temp_regs);
+
+	/* Calculate live intervals for input registers */
+	for (inst = s->C->Program.Instructions.Next;
+					inst != &s->C->Program.Instructions;
+					inst = inst->Next) {
+		rc_opcode op = rc_get_flow_control_inst(inst);
+		if (op == RC_OPCODE_BGNLOOP) {
+			struct rc_instruction * endloop =
+							rc_match_bgnloop(inst);
+			if (endloop->IP > s->LoopEnd) {
+				s->LoopEnd = endloop->IP;
+			}
+		}
+		rc_for_all_reads_mask(inst, scan_read_callback, s);
+	}
+
+	/* Create classes for input registers */
+	for (i = 0; i < s->NumInputs; i++) {
+		unsigned int chan, class_id, writemask = 0;
+		for (chan = 0; chan < 4; chan++) {
+			if (s->Input[i].Live[chan].Used) {
+				writemask |= (1 << chan);
+			}
+		}
+		s->Input[i].Writemask = writemask;
+		if (!writemask) {
+			continue;
+		}
+
+		class_id = ra_alloc_reg_class(regs);
+		input_classes[i] = class_id;
+		ra_class_add_reg(regs, class_id,
+				get_reg_id(s->Input[i].Index, writemask));
+	}
+
+	ra_set_finalize(regs);
+
+	graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
+
+	/* Build the interference graph */
+	for (var_ptr = variables, node_index = 0; var_ptr;
+					var_ptr = var_ptr->Next,node_index++) {
+		struct rc_list * a, * b;
+		unsigned int b_index;
+
+		ra_set_node_class(graph, node_index, node_classes[node_index]);
+
+		for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
+						b; b = b->Next, b_index++) {
+			struct rc_variable * var_a = a->Item;
+			while (var_a) {
+				struct rc_variable * var_b = b->Item;
+				while (var_b) {
+					if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
+						ra_add_node_interference(graph,
+							node_index, b_index);
+					}
+					var_b = var_b->Friend;
+				}
+				var_a = var_a->Friend;
+			}
+		}
+	}
+
+	/* Add input registers to the interference graph */
+	for (i = 0, input_node = 0; i< s->NumInputs; i++) {
+		if (!s->Input[i].Writemask) {
+			continue;
+		}
+		ra_set_node_class(graph, node_count + input_node,
+							input_classes[i]);
+		for (var_ptr = variables, node_index = 0;
+				var_ptr; var_ptr = var_ptr->Next, node_index++) {
+			struct rc_variable * var = var_ptr->Item;
+			if (overlap_live_intervals_array(s->Input[i].Live,
+								var->Live)) {
+				ra_add_node_interference(graph, node_index,
+						node_count + input_node);
+			}
+		}
+		/* Manually allocate a register for this input */
+		ra_set_node_reg(graph, node_count + input_node, get_reg_id(
+				s->Input[i].Index, s->Input[i].Writemask));
+		input_node++;
+	}
+
+	if (!ra_allocate_no_spills(graph)) {
+		rc_error(s->C, "Ran out of hardware temporaries\n");
+		return;
+	}
+
+	/* Rewrite the registers */
+	for (var_ptr = variables, node_index = 0; var_ptr;
+				var_ptr = var_ptr->Next, node_index++) {
+		int reg = ra_get_node_reg(graph, node_index);
+		unsigned int writemask = reg_get_writemask(reg);
+		unsigned int index = reg_get_index(reg);
+		struct rc_variable * var = var_ptr->Item;
+
+		if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
+			writemask = rc_variable_writemask_sum(var);
+		}
+
+		if (var->Dst.File == RC_FILE_INPUT) {
+			continue;
+		}
+		rc_variable_change_dst(var, index, writemask);
+	}
+
+	ralloc_free(graph);
+	ralloc_free(regs);
+}
+
+/**
+ * @param user This parameter should be a pointer to an integer value.  If this
+ * integer value is zero, then a simple register allocator will be used that
+ * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
+ * user is non-zero, then the regular register allocator will be used
+ * (\sa do_regalloc).
+  */
+void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
+{
+	struct r300_fragment_program_compiler *c =
+				(struct r300_fragment_program_compiler*)cc;
+	struct regalloc_state s;
+	int * do_full_regalloc = (int*)user;
+
+	memset(&s, 0, sizeof(s));
+	s.C = cc;
+	s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
+	s.Input = memory_pool_malloc(&cc->Pool,
+			s.NumInputs * sizeof(struct register_info));
+	memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
+
+	s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
+	s.Temporary = memory_pool_malloc(&cc->Pool,
+			s.NumTemporaries * sizeof(struct register_info));
+	memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
+
+	rc_recompute_ips(s.C);
+
+	c->AllocateHwInputs(c, &alloc_input_simple, &s);
+	if (*do_full_regalloc) {
+		do_advanced_regalloc(&s);
+	} else {
+		s.Simple = 1;
+		do_regalloc_inputs_only(&s);
+	}
+
+	/* Rewrite inputs and if we are doing the simple allocation, rewrite
+	 * temporaries too. */
+	for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
+					inst != &s.C->Program.Instructions;
+					inst = inst->Next) {
+		rc_remap_registers(inst, &remap_register, &s);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c
new file mode 100644
index 00000000000..25cd52c9cd4
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c
@@ -0,0 +1,1010 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct schedule_instruction {
+	struct rc_instruction * Instruction;
+
+	/** Next instruction in the linked list of ready instructions. */
+	struct schedule_instruction *NextReady;
+
+	/** Values that this instruction reads and writes */
+	struct reg_value * WriteValues[4];
+	struct reg_value * ReadValues[12];
+	unsigned int NumWriteValues:3;
+	unsigned int NumReadValues:4;
+
+	/**
+	 * Number of (read and write) dependencies that must be resolved before
+	 * this instruction can be scheduled.
+	 */
+	unsigned int NumDependencies:5;
+
+	/** List of all readers (see rc_get_readers() for the definition of
+	 * "all readers"), even those outside the basic block this instruction
+	 * lives in. */
+	struct rc_reader_data GlobalReaders;
+};
+
+
+/**
+ * Used to keep track of which instructions read a value.
+ */
+struct reg_value_reader {
+	struct schedule_instruction *Reader;
+	struct reg_value_reader *Next;
+};
+
+/**
+ * Used to keep track which values are stored in each component of a
+ * RC_FILE_TEMPORARY.
+ */
+struct reg_value {
+	struct schedule_instruction * Writer;
+
+	/**
+	 * Unordered linked list of instructions that read from this value.
+	 * When this value becomes available, we increase all readers'
+	 * dependency count.
+	 */
+	struct reg_value_reader *Readers;
+
+	/**
+	 * Number of readers of this value. This is decremented each time
+	 * a reader of the value is committed.
+	 * When the reader cound reaches zero, the dependency count
+	 * of the instruction writing \ref Next is decremented.
+	 */
+	unsigned int NumReaders;
+
+	struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
+};
+
+struct register_state {
+	struct reg_value * Values[4];
+};
+
+struct remap_reg {
+	struct rc_instruciont * Inst;
+	unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
+	unsigned int OldSwizzle:3;
+	unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
+	unsigned int NewSwizzle:3;
+	unsigned int OnlyTexReads:1;
+	struct remap_reg * Next;
+};
+
+struct schedule_state {
+	struct radeon_compiler * C;
+	struct schedule_instruction * Current;
+
+	struct register_state Temporary[RC_REGISTER_MAX_INDEX];
+
+	/**
+	 * Linked lists of instructions that can be scheduled right now,
+	 * based on which ALU/TEX resources they require.
+	 */
+	/*@{*/
+	struct schedule_instruction *ReadyFullALU;
+	struct schedule_instruction *ReadyRGB;
+	struct schedule_instruction *ReadyAlpha;
+	struct schedule_instruction *ReadyTEX;
+	/*@}*/
+};
+
+static struct reg_value ** get_reg_valuep(struct schedule_state * s,
+		rc_register_file file, unsigned int index, unsigned int chan)
+{
+	if (file != RC_FILE_TEMPORARY)
+		return 0;
+
+	if (index >= RC_REGISTER_MAX_INDEX) {
+		rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
+		return 0;
+	}
+
+	return &s->Temporary[index].Values[chan];
+}
+
+static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
+{
+	inst->NextReady = *list;
+	*list = inst;
+}
+
+static void add_inst_to_list_end(struct schedule_instruction ** list,
+					struct schedule_instruction * inst)
+{
+	if(!*list){
+		*list = inst;
+	}else{
+		struct schedule_instruction * temp = *list;
+		while(temp->NextReady){
+			temp = temp->NextReady;
+		}
+		temp->NextReady = inst;
+	}
+}
+
+static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+	DBG("%i is now ready\n", sinst->Instruction->IP);
+
+	/* Adding Ready TEX instructions to the end of the "Ready List" helps
+	 * us emit TEX instructions in blocks without losing our place. */
+	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
+		add_inst_to_list_end(&s->ReadyTEX, sinst);
+	else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
+		add_inst_to_list(&s->ReadyRGB, sinst);
+	else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
+		add_inst_to_list(&s->ReadyAlpha, sinst);
+	else
+		add_inst_to_list(&s->ReadyFullALU, sinst);
+}
+
+static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+	assert(sinst->NumDependencies > 0);
+	sinst->NumDependencies--;
+	if (!sinst->NumDependencies)
+		instruction_ready(s, sinst);
+}
+
+/**
+ * This function decreases the dependencies of the next instruction that
+ * wants to write to each of sinst's read values.
+ */
+static void commit_update_reads(struct schedule_state * s,
+					struct schedule_instruction * sinst){
+	unsigned int i;
+	for(i = 0; i < sinst->NumReadValues; ++i) {
+		struct reg_value * v = sinst->ReadValues[i];
+		assert(v->NumReaders > 0);
+		v->NumReaders--;
+		if (!v->NumReaders) {
+			if (v->Next)
+				decrease_dependencies(s, v->Next->Writer);
+		}
+	}
+}
+
+static void commit_update_writes(struct schedule_state * s,
+					struct schedule_instruction * sinst){
+	unsigned int i;
+	for(i = 0; i < sinst->NumWriteValues; ++i) {
+		struct reg_value * v = sinst->WriteValues[i];
+		if (v->NumReaders) {
+			for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
+				decrease_dependencies(s, r->Reader);
+			}
+		} else {
+			/* This happens in instruction sequences of the type
+			 *  OP r.x, ...;
+			 *  OP r.x, r.x, ...;
+			 * See also the subtlety in how instructions that both
+			 * read and write the same register are scanned.
+			 */
+			if (v->Next)
+				decrease_dependencies(s, v->Next->Writer);
+		}
+	}
+}
+
+static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+	DBG("%i: commit\n", sinst->Instruction->IP);
+
+	commit_update_reads(s, sinst);
+
+	commit_update_writes(s, sinst);
+}
+
+/**
+ * Emit all ready texture instructions in a single block.
+ *
+ * Emit as a single block to (hopefully) sample many textures in parallel,
+ * and to avoid hardware indirections on R300.
+ */
+static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
+{
+	struct schedule_instruction *readytex;
+	struct rc_instruction * inst_begin;
+
+	assert(s->ReadyTEX);
+
+	/* Node marker for R300 */
+	inst_begin = rc_insert_new_instruction(s->C, before->Prev);
+	inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
+
+	/* Link texture instructions back in */
+	readytex = s->ReadyTEX;
+	while(readytex) {
+		rc_insert_instruction(before->Prev, readytex->Instruction);
+		DBG("%i: commit TEX reads\n", readytex->Instruction->IP);
+
+		/* All of the TEX instructions in the same TEX block have
+		 * their source registers read from before any of the
+		 * instructions in that block write to their destination
+		 * registers.  This means that when we commit a TEX
+		 * instruction, any other TEX instruction that wants to write
+		 * to one of the committed instruction's source register can be
+		 * marked as ready and should be emitted in the same TEX
+		 * block. This prevents the following sequence from being
+		 * emitted in two different TEX blocks:
+		 * 0: TEX temp[0].xyz, temp[1].xy__, 2D[0];
+		 * 1: TEX temp[1].xyz, temp[2].xy__, 2D[0];
+		 */
+		commit_update_reads(s, readytex);
+		readytex = readytex->NextReady;
+	}
+	readytex = s->ReadyTEX;
+	s->ReadyTEX = 0;
+	while(readytex){
+		DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
+		commit_update_writes(s, readytex);
+		readytex = readytex->NextReady;
+	}
+}
+
+/* This is a helper function for destructive_merge_instructions().  It helps
+ * merge presubtract sources from two instructions and makes sure the
+ * presubtract sources end up in the correct spot.  This function assumes that
+ * dst_full is an rgb instruction, meaning that it has a vector instruction(rgb)
+ * but no scalar instruction (alpha).
+ * @return 0 if merging the presubtract sources fails.
+ * @retrun 1 if merging the presubtract sources succeeds.
+ */
+static int merge_presub_sources(
+	struct rc_pair_instruction * dst_full,
+	struct rc_pair_sub_instruction src,
+	unsigned int type)
+{
+	unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
+	struct rc_pair_sub_instruction * dst_sub;
+	const struct rc_opcode_info * info;
+
+	assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
+
+	switch(type) {
+	case RC_SOURCE_RGB:
+		is_rgb = 1;
+		is_alpha = 0;
+		dst_sub = &dst_full->RGB;
+		break;
+	case RC_SOURCE_ALPHA:
+		is_rgb = 0;
+		is_alpha = 1;
+		dst_sub = &dst_full->Alpha;
+		break;
+	default:
+		assert(0);
+		return 0;
+	}
+
+	info = rc_get_opcode_info(dst_full->RGB.Opcode);
+
+	if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
+		return 0;
+
+	srcp_regs = rc_presubtract_src_reg_count(
+					src.Src[RC_PAIR_PRESUB_SRC].Index);
+	for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
+		unsigned int arg;
+		int free_source;
+		unsigned int one_way = 0;
+		struct rc_pair_instruction_source srcp = src.Src[srcp_src];
+		struct rc_pair_instruction_source temp;
+
+		free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha,
+							srcp.File, srcp.Index);
+
+		/* If free_source < 0 then there are no free source
+		 * slots. */
+		if (free_source < 0)
+			return 0;
+
+		temp = dst_sub->Src[srcp_src];
+		dst_sub->Src[srcp_src] = dst_sub->Src[free_source];
+
+		/* srcp needs src0 and src1 to be the same */
+		if (free_source < srcp_src) {
+			if (!temp.Used)
+				continue;
+			free_source = rc_pair_alloc_source(dst_full, is_rgb,
+					is_alpha, temp.File, temp.Index);
+			if (free_source < 0)
+				return 0;
+			one_way = 1;
+		} else {
+			dst_sub->Src[free_source] = temp;
+		}
+
+		/* If free_source == srcp_src, then the presubtract
+		 * source is already in the correct place. */
+		if (free_source == srcp_src)
+			continue;
+
+		/* Shuffle the sources, so we can put the
+		 * presubtract source in the correct place. */
+		for(arg = 0; arg < info->NumSrcRegs; arg++) {
+			/*If this arg does not read from an rgb source,
+			 * do nothing. */
+			if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
+								& type)) {
+				continue;
+			}
+
+			if (dst_full->RGB.Arg[arg].Source == srcp_src)
+				dst_full->RGB.Arg[arg].Source = free_source;
+			/* We need to do this just in case register
+			 * is one of the sources already, but in the
+			 * wrong spot. */
+			else if(dst_full->RGB.Arg[arg].Source == free_source
+							&& !one_way) {
+				dst_full->RGB.Arg[arg].Source = srcp_src;
+			}
+		}
+	}
+	return 1;
+}
+
+
+/* This function assumes that rgb.Alpha and alpha.RGB are unused */
+static int destructive_merge_instructions(
+		struct rc_pair_instruction * rgb,
+		struct rc_pair_instruction * alpha)
+{
+	const struct rc_opcode_info * opcode;
+
+	assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
+	assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
+
+	/* Presubtract registers need to be merged first so that registers
+	 * needed by the presubtract operation can be placed in src0 and/or
+	 * src1. */
+
+	/* Merge the rgb presubtract registers. */
+	if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+		if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
+			return 0;
+		}
+	}
+	/* Merge the alpha presubtract registers */
+	if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+		if(!merge_presub_sources(rgb,  alpha->Alpha, RC_SOURCE_ALPHA)){
+			return 0;
+		}
+	}
+
+	/* Copy alpha args into rgb */
+	opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
+
+	for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+		unsigned int srcrgb = 0;
+		unsigned int srcalpha = 0;
+		unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
+		rc_register_file file = 0;
+		unsigned int index = 0;
+		int source;
+
+		if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
+			srcrgb = 1;
+			file = alpha->RGB.Src[oldsrc].File;
+			index = alpha->RGB.Src[oldsrc].Index;
+		} else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
+			srcalpha = 1;
+			file = alpha->Alpha.Src[oldsrc].File;
+			index = alpha->Alpha.Src[oldsrc].Index;
+		}
+
+		source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
+		if (source < 0)
+			return 0;
+
+		rgb->Alpha.Arg[arg].Source = source;
+		rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
+		rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
+		rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
+	}
+
+	/* Copy alpha opcode into rgb */
+	rgb->Alpha.Opcode = alpha->Alpha.Opcode;
+	rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
+	rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
+	rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
+	rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
+	rgb->Alpha.Saturate = alpha->Alpha.Saturate;
+
+	/* Merge ALU result writing */
+	if (alpha->WriteALUResult) {
+		if (rgb->WriteALUResult)
+			return 0;
+
+		rgb->WriteALUResult = alpha->WriteALUResult;
+		rgb->ALUResultCompare = alpha->ALUResultCompare;
+	}
+
+	return 1;
+}
+
+/**
+ * Try to merge the given instructions into the rgb instructions.
+ *
+ * Return true on success; on failure, return false, and keep
+ * the instructions untouched.
+ */
+static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
+{
+	struct rc_pair_instruction backup;
+
+	/*Instructions can't write output registers and ALU result at the
+	 * same time. */
+	if ((rgb->WriteALUResult && alpha->Alpha.OutputWriteMask)
+		|| (rgb->RGB.OutputWriteMask && alpha->WriteALUResult)) {
+		return 0;
+	}
+	memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
+
+	if (destructive_merge_instructions(rgb, alpha))
+		return 1;
+
+	memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
+	return 0;
+}
+
+static void presub_nop(struct rc_instruction * emitted) {
+	int prev_rgb_index, prev_alpha_index, i, num_src;
+
+	/* We don't need a nop if the previous instruction is a TEX. */
+	if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
+		return;
+	}
+	if (emitted->Prev->U.P.RGB.WriteMask)
+		prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
+	else
+		prev_rgb_index = -1;
+	if (emitted->Prev->U.P.Alpha.WriteMask)
+		prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
+	else
+		prev_alpha_index = 1;
+
+	/* Check the previous rgb instruction */
+	if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+		num_src = rc_presubtract_src_reg_count(
+				emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
+		for (i = 0; i < num_src; i++) {
+			unsigned int index = emitted->U.P.RGB.Src[i].Index;
+			if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
+			    && (index  == prev_rgb_index
+				|| index == prev_alpha_index)) {
+				emitted->Prev->U.P.Nop = 1;
+				return;
+			}
+		}
+	}
+
+	/* Check the previous alpha instruction. */
+	if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
+		return;
+
+	num_src = rc_presubtract_src_reg_count(
+				emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
+	for (i = 0; i < num_src; i++) {
+		unsigned int index = emitted->U.P.Alpha.Src[i].Index;
+		if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
+		   && (index == prev_rgb_index || index == prev_alpha_index)) {
+			emitted->Prev->U.P.Nop = 1;
+			return;
+		}
+	}
+}
+
+static void rgb_to_alpha_remap (
+	struct rc_instruction * inst,
+	struct rc_pair_instruction_arg * arg,
+	rc_register_file old_file,
+	rc_swizzle old_swz,
+	unsigned int new_index)
+{
+	int new_src_index;
+	unsigned int i;
+
+	for (i = 0; i < 3; i++) {
+		if (get_swz(arg->Swizzle, i) == old_swz) {
+			SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
+		}
+	}
+	new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
+							old_file, new_index);
+	/* This conversion is not possible, we must have made a mistake in
+	 * is_rgb_to_alpha_possible. */
+	if (new_src_index < 0) {
+		assert(0);
+		return;
+	}
+
+	arg->Source = new_src_index;
+}
+
+static int can_remap(unsigned int opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_DDX:
+	case RC_OPCODE_DDY:
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+static int can_convert_opcode_to_alpha(unsigned int opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_DDX:
+	case RC_OPCODE_DDY:
+	case RC_OPCODE_DP2:
+	case RC_OPCODE_DP3:
+	case RC_OPCODE_DP4:
+	case RC_OPCODE_DPH:
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+static void is_rgb_to_alpha_possible(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_pair_instruction_arg * arg,
+	struct rc_pair_instruction_source * src)
+{
+	unsigned int chan_count = 0;
+	unsigned int alpha_sources = 0;
+	unsigned int i;
+	struct rc_reader_data * reader_data = userdata;
+
+	if (!can_remap(inst->U.P.RGB.Opcode)
+	    || !can_remap(inst->U.P.Alpha.Opcode)) {
+		reader_data->Abort = 1;
+		return;
+	}
+
+	if (!src)
+		return;
+
+	/* XXX There are some cases where we can still do the conversion if
+	 * a reader reads from a presubtract source, but for now we'll prevent
+	 * it. */
+	if (arg->Source == RC_PAIR_PRESUB_SRC) {
+		reader_data->Abort = 1;
+		return;
+	}
+
+	/* Make sure the source only reads from one component.
+	 * XXX We should allow the source to read from the same component twice.
+	 * XXX If the index we will be converting to is the same as the
+	 * current index, then it is OK to read from more than one component.
+	 */
+	for (i = 0; i < 3; i++) {
+		rc_swizzle swz = get_swz(arg->Swizzle, i);
+		switch(swz) {
+		case RC_SWIZZLE_X:
+		case RC_SWIZZLE_Y:
+		case RC_SWIZZLE_Z:
+		case RC_SWIZZLE_W:
+			chan_count++;
+			break;
+		default:
+			break;
+		}
+	}
+	if (chan_count > 1) {
+		reader_data->Abort = 1;
+		return;
+	}
+
+	/* Make sure there are enough alpha sources.
+	 * XXX If we know what register all the readers are going
+	 * to be remapped to, then in some situations we can still do
+	 * the subsitution, even if all 3 alpha sources are being used.*/
+	for (i = 0; i < 3; i++) {
+		if (inst->U.P.Alpha.Src[i].Used) {
+			alpha_sources++;
+		}
+	}
+	if (alpha_sources > 2) {
+		reader_data->Abort = 1;
+		return;
+	}
+}
+
+static int convert_rgb_to_alpha(
+	struct schedule_state * s,
+	struct schedule_instruction * sched_inst)
+{
+	struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
+	unsigned int old_mask = pair_inst->RGB.WriteMask;
+	unsigned int old_swz = rc_mask_to_swizzle(old_mask);
+	const struct rc_opcode_info * info =
+				rc_get_opcode_info(pair_inst->RGB.Opcode);
+	int new_index = -1;
+	unsigned int i;
+
+	if (sched_inst->GlobalReaders.Abort)
+		return 0;
+
+	if (!pair_inst->RGB.WriteMask)
+		return 0;
+
+	if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
+	    || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
+		return 0;
+	}
+
+	assert(sched_inst->NumWriteValues == 1);
+
+	if (!sched_inst->WriteValues[0]) {
+		assert(0);
+		return 0;
+	}
+
+	/* We start at the old index, because if we can reuse the same
+	 * register and just change the swizzle then it is more likely we
+	 * will be able to convert all the readers. */
+	for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
+		struct reg_value ** new_regvalp = get_reg_valuep(
+						s, RC_FILE_TEMPORARY, i, 3);
+		if (!*new_regvalp) {
+			struct reg_value ** old_regvalp =
+				get_reg_valuep(s,
+					RC_FILE_TEMPORARY,
+					pair_inst->RGB.DestIndex,
+					rc_mask_to_swizzle(old_mask));
+			new_index = i;
+			*new_regvalp = *old_regvalp;
+			*old_regvalp = NULL;
+			new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
+			break;
+		}
+	}
+	if (new_index < 0) {
+		return 0;
+	}
+
+	pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
+	pair_inst->Alpha.DestIndex = new_index;
+	pair_inst->Alpha.WriteMask = RC_MASK_W;
+	pair_inst->Alpha.Target = pair_inst->RGB.Target;
+	pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
+	pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
+	pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
+	memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
+						sizeof(pair_inst->Alpha.Arg));
+	/* Move the swizzles into the first chan */
+	for (i = 0; i < info->NumSrcRegs; i++) {
+		unsigned int j;
+		for (j = 0; j < 3; j++) {
+			unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
+			if (swz != RC_SWIZZLE_UNUSED) {
+				pair_inst->Alpha.Arg[i].Swizzle =
+							rc_init_swizzle(swz, 1);
+				break;
+			}
+		}
+	}
+	pair_inst->RGB.Opcode = RC_OPCODE_NOP;
+	pair_inst->RGB.DestIndex = 0;
+	pair_inst->RGB.WriteMask = 0;
+	pair_inst->RGB.Target = 0;
+	pair_inst->RGB.OutputWriteMask = 0;
+	pair_inst->RGB.DepthWriteMask = 0;
+	pair_inst->RGB.Saturate = 0;
+	memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
+
+	for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
+		struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
+		rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
+					RC_FILE_TEMPORARY, old_swz, new_index);
+	}
+	return 1;
+}
+
+/**
+ * Find a good ALU instruction or pair of ALU instruction and emit it.
+ *
+ * Prefer emitting full ALU instructions, so that when we reach a point
+ * where no full ALU instruction can be emitted, we have more candidates
+ * for RGB/Alpha pairing.
+ */
+static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
+{
+	struct schedule_instruction * sinst;
+
+	if (s->ReadyFullALU) {
+		sinst = s->ReadyFullALU;
+		s->ReadyFullALU = s->ReadyFullALU->NextReady;
+		rc_insert_instruction(before->Prev, sinst->Instruction);
+		commit_alu_instruction(s, sinst);
+	} else {
+		struct schedule_instruction **prgb;
+		struct schedule_instruction **palpha;
+		struct schedule_instruction *prev;
+pair:
+		/* Some pairings might fail because they require too
+		 * many source slots; try all possible pairings if necessary */
+		for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+			for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
+				struct schedule_instruction * psirgb = *prgb;
+				struct schedule_instruction * psialpha = *palpha;
+
+				if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P))
+					continue;
+
+				*prgb = (*prgb)->NextReady;
+				*palpha = (*palpha)->NextReady;
+				rc_insert_instruction(before->Prev, psirgb->Instruction);
+				commit_alu_instruction(s, psirgb);
+				commit_alu_instruction(s, psialpha);
+				goto success;
+			}
+		}
+		prev = NULL;
+		/* No success in pairing, now try to convert one of the RGB
+		 * instructions to an Alpha so we can pair it with another RGB.
+		 */
+		if (s->ReadyRGB && s->ReadyRGB->NextReady) {
+		for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+			if ((*prgb)->NumWriteValues == 1) {
+				struct schedule_instruction * prgb_next;
+				if (!convert_rgb_to_alpha(s, *prgb))
+					goto cont_loop;
+				prgb_next = (*prgb)->NextReady;
+				/* Add instruction to the Alpha ready list. */
+				(*prgb)->NextReady = s->ReadyAlpha;
+				s->ReadyAlpha = *prgb;
+				/* Remove instruction from the RGB ready list.*/
+				if (prev)
+					prev->NextReady = prgb_next;
+				else
+					s->ReadyRGB = prgb_next;
+				goto pair;
+			}
+cont_loop:
+			prev = *prgb;
+		}
+		}
+		/* Still no success in pairing, just take the first RGB
+		 * or alpha instruction. */
+		if (s->ReadyRGB) {
+			sinst = s->ReadyRGB;
+			s->ReadyRGB = s->ReadyRGB->NextReady;
+		} else if (s->ReadyAlpha) {
+			sinst = s->ReadyAlpha;
+			s->ReadyAlpha = s->ReadyAlpha->NextReady;
+		} else {
+			/*XXX Something real bad has happened. */
+			assert(0);
+		}
+
+		rc_insert_instruction(before->Prev, sinst->Instruction);
+		commit_alu_instruction(s, sinst);
+	success: ;
+	}
+	/* If the instruction we just emitted uses a presubtract value, and
+	 * the presubtract sources were written by the previous intstruction,
+	 * the previous instruction needs a nop. */
+	presub_nop(before->Prev);
+}
+
+static void scan_read(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int chan)
+{
+	struct schedule_state * s = data;
+	struct reg_value ** v = get_reg_valuep(s, file, index, chan);
+	struct reg_value_reader * reader;
+
+	if (!v)
+		return;
+
+	if (*v && (*v)->Writer == s->Current) {
+		/* The instruction reads and writes to a register component.
+		 * In this case, we only want to increment dependencies by one.
+		 */
+		return;
+	}
+
+	DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+	reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
+	reader->Reader = s->Current;
+	if (!*v) {
+		/* In this situation, the instruction reads from a register
+		 * that hasn't been written to or read from in the current
+		 * block. */
+		*v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
+		memset(*v, 0, sizeof(struct reg_value));
+		(*v)->Readers = reader;
+	} else {
+		reader->Next = (*v)->Readers;
+		(*v)->Readers = reader;
+		/* Only update the current instruction's dependencies if the
+		 * register it reads from has been written to in this block. */
+		if ((*v)->Writer) {
+			s->Current->NumDependencies++;
+		}
+	}
+	(*v)->NumReaders++;
+
+	if (s->Current->NumReadValues >= 12) {
+		rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
+	} else {
+		s->Current->ReadValues[s->Current->NumReadValues++] = *v;
+	}
+}
+
+static void scan_write(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int chan)
+{
+	struct schedule_state * s = data;
+	struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+	struct reg_value * newv;
+
+	if (!pv)
+		return;
+
+	DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+	newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
+	memset(newv, 0, sizeof(*newv));
+
+	newv->Writer = s->Current;
+
+	if (*pv) {
+		(*pv)->Next = newv;
+		s->Current->NumDependencies++;
+	}
+
+	*pv = newv;
+
+	if (s->Current->NumWriteValues >= 4) {
+		rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
+	} else {
+		s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
+	}
+}
+
+static void is_rgb_to_alpha_possible_normal(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	struct rc_reader_data * reader_data = userdata;
+	reader_data->Abort = 1;
+
+}
+
+static void schedule_block(struct r300_fragment_program_compiler * c,
+		struct rc_instruction * begin, struct rc_instruction * end)
+{
+	struct schedule_state s;
+	unsigned int ip;
+
+	memset(&s, 0, sizeof(s));
+	s.C = &c->Base;
+
+	/* Scan instructions for data dependencies */
+	ip = 0;
+	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+		s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
+		memset(s.Current, 0, sizeof(struct schedule_instruction));
+
+		s.Current->Instruction = inst;
+		inst->IP = ip++;
+
+		DBG("%i: Scanning\n", inst->IP);
+
+		/* The order of things here is subtle and maybe slightly
+		 * counter-intuitive, to account for the case where an
+		 * instruction writes to the same register as it reads
+		 * from. */
+		rc_for_all_writes_chan(inst, &scan_write, &s);
+		rc_for_all_reads_chan(inst, &scan_read, &s);
+
+		DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
+
+		if (!s.Current->NumDependencies)
+			instruction_ready(&s, s.Current);
+
+		/* Get global readers for possible RGB->Alpha conversion. */
+		s.Current->GlobalReaders.ExitOnAbort = 1;
+		rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
+				is_rgb_to_alpha_possible_normal,
+				is_rgb_to_alpha_possible, NULL);
+	}
+
+	/* Temporarily unlink all instructions */
+	begin->Prev->Next = end;
+	end->Prev = begin->Prev;
+
+	/* Schedule instructions back */
+	while(!s.C->Error &&
+	      (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
+		if (s.ReadyTEX)
+			emit_all_tex(&s, end);
+
+		while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
+			emit_one_alu(&s, end);
+	}
+}
+
+static int is_controlflow(struct rc_instruction * inst)
+{
+	if (inst->Type == RC_INSTRUCTION_NORMAL) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		return opcode->IsFlowControl;
+	}
+	return 0;
+}
+
+void rc_pair_schedule(struct radeon_compiler *cc, void *user)
+{
+	struct schedule_state s;
+
+	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
+	struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+
+	memset(&s, 0, sizeof(s));
+	s.C = &c->Base;
+	while(inst != &c->Base.Program.Instructions) {
+		struct rc_instruction * first;
+
+		if (is_controlflow(inst)) {
+			inst = inst->Next;
+			continue;
+		}
+
+		first = inst;
+
+		while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
+			inst = inst->Next;
+
+		DBG("Schedule one block\n");
+		schedule_block(c, first, inst);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_translate.c b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c
new file mode 100644
index 00000000000..2dae56a2428
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_translate.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+
+
+/**
+ * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
+ * and reverse the order of arguments for CMP.
+ */
+static void final_rewrite(struct rc_sub_instruction *inst)
+{
+	struct rc_src_register tmp;
+
+	switch(inst->Opcode) {
+	case RC_OPCODE_ADD:
+		inst->SrcReg[2] = inst->SrcReg[1];
+		inst->SrcReg[1].File = RC_FILE_NONE;
+		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+		inst->SrcReg[1].Negate = RC_MASK_NONE;
+		inst->Opcode = RC_OPCODE_MAD;
+		break;
+	case RC_OPCODE_CMP:
+		tmp = inst->SrcReg[2];
+		inst->SrcReg[2] = inst->SrcReg[0];
+		inst->SrcReg[0] = tmp;
+		break;
+	case RC_OPCODE_MOV:
+		/* AMD say we should use CMP.
+		 * However, when we transform
+		 *  KIL -r0;
+		 * into
+		 *  CMP tmp, -r0, -r0, 0;
+		 *  KIL tmp;
+		 * we get incorrect behaviour on R500 when r0 == 0.0.
+		 * It appears that the R500 KIL hardware treats -0.0 as less
+		 * than zero.
+		 */
+		inst->SrcReg[1].File = RC_FILE_NONE;
+		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+		inst->SrcReg[2].File = RC_FILE_NONE;
+		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+		inst->Opcode = RC_OPCODE_MAD;
+		break;
+	case RC_OPCODE_MUL:
+		inst->SrcReg[2].File = RC_FILE_NONE;
+		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+		inst->Opcode = RC_OPCODE_MAD;
+		break;
+	default:
+		/* nothing to do */
+		break;
+	}
+}
+
+
+/**
+ * Classify an instruction according to which ALUs etc. it needs
+ */
+static void classify_instruction(struct rc_sub_instruction * inst,
+	int * needrgb, int * needalpha, int * istranscendent)
+{
+	*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
+	*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
+	*istranscendent = 0;
+
+	if (inst->WriteALUResult == RC_ALURESULT_X)
+		*needrgb = 1;
+	else if (inst->WriteALUResult == RC_ALURESULT_W)
+		*needalpha = 1;
+
+	switch(inst->Opcode) {
+	case RC_OPCODE_ADD:
+	case RC_OPCODE_CMP:
+	case RC_OPCODE_CND:
+	case RC_OPCODE_DDX:
+	case RC_OPCODE_DDY:
+	case RC_OPCODE_FRC:
+	case RC_OPCODE_MAD:
+	case RC_OPCODE_MAX:
+	case RC_OPCODE_MIN:
+	case RC_OPCODE_MOV:
+	case RC_OPCODE_MUL:
+		break;
+	case RC_OPCODE_COS:
+	case RC_OPCODE_EX2:
+	case RC_OPCODE_LG2:
+	case RC_OPCODE_RCP:
+	case RC_OPCODE_RSQ:
+	case RC_OPCODE_SIN:
+		*istranscendent = 1;
+		*needalpha = 1;
+		break;
+	case RC_OPCODE_DP4:
+		*needalpha = 1;
+		/* fall through */
+	case RC_OPCODE_DP3:
+		*needrgb = 1;
+		break;
+	default:
+		break;
+	}
+}
+
+static void src_uses(struct rc_src_register src, unsigned int * rgb,
+							unsigned int * alpha)
+{
+	int j;
+	for(j = 0; j < 4; ++j) {
+		unsigned int swz = GET_SWZ(src.Swizzle, j);
+		if (swz < 3)
+			*rgb = 1;
+		else if (swz < 4)
+			*alpha = 1;
+	}
+}
+
+/**
+ * Fill the given ALU instruction's opcodes and source operands into the given pair,
+ * if possible.
+ */
+static void set_pair_instruction(struct r300_fragment_program_compiler *c,
+	struct rc_pair_instruction * pair,
+	struct rc_sub_instruction * inst)
+{
+	int needrgb, needalpha, istranscendent;
+	const struct rc_opcode_info * opcode;
+	int i;
+
+	memset(pair, 0, sizeof(struct rc_pair_instruction));
+
+	classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
+
+	if (needrgb) {
+		if (istranscendent)
+			pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
+		else
+			pair->RGB.Opcode = inst->Opcode;
+		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+			pair->RGB.Saturate = 1;
+	}
+	if (needalpha) {
+		pair->Alpha.Opcode = inst->Opcode;
+		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+			pair->Alpha.Saturate = 1;
+	}
+
+	opcode = rc_get_opcode_info(inst->Opcode);
+
+	/* Presubtract handling:
+	 * We need to make sure that the values used by the presubtract
+	 * operation end up in src0 or src1. */
+	if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
+		/* rc_pair_alloc_source() will fill in data for
+		 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
+		int j;
+		for(j = 0; j < 3; j++) {
+			int src_regs;
+			if(inst->SrcReg[j].File != RC_FILE_PRESUB)
+				continue;
+
+			src_regs = rc_presubtract_src_reg_count(
+							inst->PreSub.Opcode);
+			for(i = 0; i < src_regs; i++) {
+				unsigned int rgb = 0;
+				unsigned int alpha = 0;
+				src_uses(inst->SrcReg[j], &rgb, &alpha);
+				if(rgb) {
+					pair->RGB.Src[i].File =
+						inst->PreSub.SrcReg[i].File;
+					pair->RGB.Src[i].Index =
+						inst->PreSub.SrcReg[i].Index;
+					pair->RGB.Src[i].Used = 1;
+				}
+				if(alpha) {
+					pair->Alpha.Src[i].File =
+						inst->PreSub.SrcReg[i].File;
+					pair->Alpha.Src[i].Index =
+						inst->PreSub.SrcReg[i].Index;
+					pair->Alpha.Src[i].Used = 1;
+				}
+			}
+		}
+	}
+
+	for(i = 0; i < opcode->NumSrcRegs; ++i) {
+		int source;
+		if (needrgb && !istranscendent) {
+			unsigned int srcrgb = 0;
+			unsigned int srcalpha = 0;
+			unsigned int srcmask = 0;
+			int j;
+			/* We don't care about the alpha channel here.  We only
+			 * want the part of the swizzle that writes to rgb,
+			 * since we are creating an rgb instruction. */
+			for(j = 0; j < 3; ++j) {
+				unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
+
+				if (swz < RC_SWIZZLE_W)
+					srcrgb = 1;
+				else if (swz == RC_SWIZZLE_W)
+					srcalpha = 1;
+
+				if (swz < RC_SWIZZLE_UNUSED)
+					srcmask |= 1 << j;
+			}
+			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+							inst->SrcReg[i].File, inst->SrcReg[i].Index);
+			if (source < 0) {
+				rc_error(&c->Base, "Failed to translate "
+							"rgb instruction.\n");
+				return;
+			}
+			pair->RGB.Arg[i].Source = source;
+			pair->RGB.Arg[i].Swizzle =
+				rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
+			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
+			pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
+		}
+		if (needalpha) {
+			unsigned int srcrgb = 0;
+			unsigned int srcalpha = 0;
+			unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3);
+			if (swz < 3)
+				srcrgb = 1;
+			else if (swz < 4)
+				srcalpha = 1;
+			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+							inst->SrcReg[i].File, inst->SrcReg[i].Index);
+			if (source < 0) {
+				rc_error(&c->Base, "Failed to translate "
+							"alpha instruction.\n");
+				return;
+			}
+			pair->Alpha.Arg[i].Source = source;
+			pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
+			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
+			pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
+		}
+	}
+
+	/* Destination handling */
+	if (inst->DstReg.File == RC_FILE_OUTPUT) {
+        if (inst->DstReg.Index == c->OutputDepth) {
+            pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+        } else {
+            for (i = 0; i < 4; i++) {
+                if (inst->DstReg.Index == c->OutputColor[i]) {
+                    pair->RGB.Target = i;
+                    pair->Alpha.Target = i;
+                    pair->RGB.OutputWriteMask |=
+                        inst->DstReg.WriteMask & RC_MASK_XYZ;
+                    pair->Alpha.OutputWriteMask |=
+                        GET_BIT(inst->DstReg.WriteMask, 3);
+                    break;
+                }
+            }
+        }
+	} else {
+		if (needrgb) {
+			pair->RGB.DestIndex = inst->DstReg.Index;
+			pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
+		}
+
+		if (needalpha) {
+			pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
+			if (pair->Alpha.WriteMask) {
+				pair->Alpha.DestIndex = inst->DstReg.Index;
+			}
+		}
+	}
+
+	if (inst->WriteALUResult) {
+		pair->WriteALUResult = inst->WriteALUResult;
+		pair->ALUResultCompare = inst->ALUResultCompare;
+	}
+}
+
+
+static void check_opcode_support(struct r300_fragment_program_compiler *c,
+				 struct rc_sub_instruction *inst)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+	if (opcode->HasDstReg) {
+		if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
+			rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
+			return;
+		}
+	}
+
+	for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+		if (inst->SrcReg[i].RelAddr) {
+			rc_error(&c->Base, "Fragment program does not support relative addressing "
+				 " of source operands.\n");
+			return;
+		}
+	}
+}
+
+
+/**
+ * Translate all ALU instructions into corresponding pair instructions,
+ * performing no other changes.
+ */
+void rc_pair_translate(struct radeon_compiler *cc, void *user)
+{
+	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
+
+	for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+	    inst != &c->Base.Program.Instructions;
+	    inst = inst->Next) {
+		const struct rc_opcode_info * opcode;
+		struct rc_sub_instruction copy;
+
+		if (inst->Type != RC_INSTRUCTION_NORMAL)
+			continue;
+
+		opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
+			continue;
+
+		copy = inst->U.I;
+
+		check_opcode_support(c, &copy);
+
+		final_rewrite(&copy);
+		inst->Type = RC_INSTRUCTION_PAIR;
+		set_pair_instruction(c, &inst->U.P, &copy);
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program.c b/src/gallium/drivers/r300/compiler/radeon_program.c
new file mode 100644
index 00000000000..fe5756ebc45
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+/**
+ * Transform the given clause in the following way:
+ *  1. Replace it with an empty clause
+ *  2. For every instruction in the original clause, try the given
+ *     transformations in order.
+ *  3. If one of the transformations returns GL_TRUE, assume that it
+ *     has emitted the appropriate instruction(s) into the new clause;
+ *     otherwise, copy the instruction verbatim.
+ *
+ * \note The transformation is currently not recursive; in other words,
+ * instructions emitted by transformations are not transformed.
+ *
+ * \note The transform is called 'local' because it can only look at
+ * one instruction at a time.
+ */
+void rc_local_transform(
+	struct radeon_compiler * c,
+	void *user)
+{
+	struct radeon_program_transformation *transformations =
+		(struct radeon_program_transformation*)user;
+	struct rc_instruction * inst = c->Program.Instructions.Next;
+
+	while(inst != &c->Program.Instructions) {
+		struct rc_instruction * current = inst;
+		int i;
+
+		inst = inst->Next;
+
+		for(i = 0; transformations[i].function; ++i) {
+			struct radeon_program_transformation* t = transformations + i;
+
+			if (t->function(c, current, t->userData))
+				break;
+		}
+	}
+}
+
+struct get_used_temporaries_data {
+	unsigned char * Used;
+	unsigned int UsedLength;
+};
+
+static void get_used_temporaries_cb(
+	void * userdata,
+	struct rc_instruction * inst,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int mask)
+{
+	struct get_used_temporaries_data * d = userdata;
+
+	if (file != RC_FILE_TEMPORARY)
+		return;
+
+	if (index >= d->UsedLength)
+		return;
+
+	d->Used[index] |= mask;
+}
+
+/**
+ * This function fills in the parameter 'used' with a writemask that
+ * represent which components of each temporary register are used by the
+ * program.  This is meant to be combined with rc_find_free_temporary_list as a
+ * more efficient version of rc_find_free_temporary.
+ * @param used The function does not initialize this parameter.
+ */
+void rc_get_used_temporaries(
+	struct radeon_compiler * c,
+	unsigned char * used,
+	unsigned int used_length)
+{
+	struct rc_instruction * inst;
+	struct get_used_temporaries_data d;
+	d.Used = used;
+	d.UsedLength = used_length;
+
+	for(inst = c->Program.Instructions.Next;
+			inst != &c->Program.Instructions; inst = inst->Next) {
+
+		rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d);
+		rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d);
+	}
+}
+
+/* Search a list of used temporaries for a free one
+ * \sa rc_get_used_temporaries
+ * @note If this functions finds a free temporary, it will mark it as used
+ * in the used temporary list (param 'used')
+ * @param used list of used temporaries
+ * @param used_length number of items in param 'used'
+ * @param mask which components must be free in the temporary index that is
+ * returned.
+ * @return -1 If there are no more free temporaries, otherwise the index of
+ * a temporary register where the components specified in param 'mask' are
+ * not being used.
+ */
+int rc_find_free_temporary_list(
+	struct radeon_compiler * c,
+	unsigned char * used,
+	unsigned int used_length,
+	unsigned int mask)
+{
+	int i;
+	for(i = 0; i < used_length; i++) {
+		if ((~used[i] & mask) == mask) {
+			used[i] |= mask;
+			return i;
+		}
+	}
+	return -1;
+}
+
+unsigned int rc_find_free_temporary(struct radeon_compiler * c)
+{
+	unsigned char used[RC_REGISTER_MAX_INDEX];
+	int free;
+
+	memset(used, 0, sizeof(used));
+
+	rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX);
+
+	free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX,
+								RC_MASK_XYZW);
+	if (free < 0) {
+		rc_error(c, "Ran out of temporary registers\n");
+		return 0;
+	}
+	return free;
+}
+
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
+{
+	struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
+
+	memset(inst, 0, sizeof(struct rc_instruction));
+
+	inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+	inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+	inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
+	inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;
+
+	return inst;
+}
+
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst)
+{
+	inst->Prev = after;
+	inst->Next = after->Next;
+
+	inst->Prev->Next = inst;
+	inst->Next->Prev = inst;
+}
+
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+{
+	struct rc_instruction * inst = rc_alloc_instruction(c);
+
+	rc_insert_instruction(after, inst);
+
+	return inst;
+}
+
+void rc_remove_instruction(struct rc_instruction * inst)
+{
+	inst->Prev->Next = inst->Next;
+	inst->Next->Prev = inst->Prev;
+}
+
+/**
+ * Return the number of instructions in the program.
+ */
+unsigned int rc_recompute_ips(struct radeon_compiler * c)
+{
+	unsigned int ip = 0;
+	struct rc_instruction * inst;
+
+	for(inst = c->Program.Instructions.Next;
+	    inst != &c->Program.Instructions;
+	    inst = inst->Next) {
+		inst->IP = ip++;
+	}
+
+	c->Program.Instructions.IP = 0xcafedead;
+
+	return ip;
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program.h b/src/gallium/drivers/r300/compiler/radeon_program.h
new file mode 100644
index 00000000000..b899eccbf53
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_H_
+#define __RADEON_PROGRAM_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include "radeon_opcodes.h"
+#include "radeon_code.h"
+#include "radeon_program_constants.h"
+#include "radeon_program_pair.h"
+
+struct radeon_compiler;
+
+struct rc_src_register {
+	unsigned int File:4;
+
+	/** Negative values may be used for relative addressing. */
+	signed int Index:(RC_REGISTER_INDEX_BITS+1);
+	unsigned int RelAddr:1;
+
+	unsigned int Swizzle:12;
+
+	/** Take the component-wise absolute value */
+	unsigned int Abs:1;
+
+	/** Post-Abs negation. */
+	unsigned int Negate:4;
+};
+
+struct rc_dst_register {
+	unsigned int File:3;
+	unsigned int Index:RC_REGISTER_INDEX_BITS;
+	unsigned int WriteMask:4;
+};
+
+struct rc_presub_instruction {
+	rc_presubtract_op Opcode;
+	struct rc_src_register SrcReg[2];
+};
+
+/**
+ * Instructions are maintained by the compiler in a doubly linked list
+ * of these structures.
+ *
+ * This instruction format is intended to be expanded for hardware-specific
+ * trickery. At different stages of compilation, a different set of
+ * instruction types may be valid.
+ */
+struct rc_sub_instruction {
+	struct rc_src_register SrcReg[3];
+	struct rc_dst_register DstReg;
+
+	/**
+	 * Opcode of this instruction, according to \ref rc_opcode enums.
+	 */
+	unsigned int Opcode:8;
+
+	/**
+	 * Saturate each value of the result to the range [0,1] or [-1,1],
+	 * according to \ref rc_saturate_mode enums.
+	 */
+	unsigned int SaturateMode:2;
+
+	/**
+	 * Writing to the special register RC_SPECIAL_ALU_RESULT
+	 */
+	/*@{*/
+	unsigned int WriteALUResult:2;
+	unsigned int ALUResultCompare:3;
+	/*@}*/
+
+	/**
+	 * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
+	 */
+	/*@{*/
+	/** Source texture unit. */
+	unsigned int TexSrcUnit:5;
+
+	/** Source texture target, one of the \ref rc_texture_target enums */
+	unsigned int TexSrcTarget:3;
+
+	/** True if tex instruction should do shadow comparison */
+	unsigned int TexShadow:1;
+
+	/**R500 Only.  How to swizzle the result of a TEX lookup*/
+	unsigned int TexSwizzle:12;
+	/*@}*/
+
+	/** This holds information about the presubtract operation used by
+	 * this instruction. */
+	struct rc_presub_instruction PreSub;
+};
+
+typedef enum {
+	RC_INSTRUCTION_NORMAL = 0,
+	RC_INSTRUCTION_PAIR
+} rc_instruction_type;
+
+struct rc_instruction {
+	struct rc_instruction * Prev;
+	struct rc_instruction * Next;
+
+	rc_instruction_type Type;
+	union {
+		struct rc_sub_instruction I;
+		struct rc_pair_instruction P;
+	} U;
+
+	/**
+	 * Warning: IPs are not stable. If you want to use them,
+	 * you need to recompute them at the beginning of each pass
+	 * using \ref rc_recompute_ips
+	 */
+	unsigned int IP;
+};
+
+struct rc_program {
+	/**
+	 * Instructions.Next points to the first instruction,
+	 * Instructions.Prev points to the last instruction.
+	 */
+	struct rc_instruction Instructions;
+
+	/* Long term, we should probably remove InputsRead & OutputsWritten,
+	 * since updating dependent state can be fragile, and they aren't
+	 * actually used very often. */
+	uint32_t InputsRead;
+	uint32_t OutputsWritten;
+	uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
+
+	struct rc_constant_list Constants;
+};
+
+/**
+ * A transformation that can be passed to \ref rc_local_transform.
+ *
+ * The function will be called once for each instruction.
+ * It has to either emit the appropriate transformed code for the instruction
+ * and return true, or return false if it doesn't understand the
+ * instruction.
+ *
+ * The function gets passed the userData as last parameter.
+ */
+struct radeon_program_transformation {
+	int (*function)(
+		struct radeon_compiler*,
+		struct rc_instruction*,
+		void*);
+	void *userData;
+};
+
+void rc_local_transform(
+	struct radeon_compiler *c,
+	void *user);
+
+void rc_get_used_temporaries(
+	struct radeon_compiler * c,
+	unsigned char * used,
+	unsigned int used_length);
+
+int rc_find_free_temporary_list(
+	struct radeon_compiler * c,
+	unsigned char * used,
+	unsigned int used_length,
+	unsigned int mask);
+
+unsigned int rc_find_free_temporary(struct radeon_compiler * c);
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst);
+void rc_remove_instruction(struct rc_instruction * inst);
+
+unsigned int rc_recompute_ips(struct radeon_compiler * c);
+
+void rc_print_program(const struct rc_program *prog);
+
+rc_swizzle rc_mask_to_swizzle(unsigned int mask);
+#endif
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
new file mode 100644
index 00000000000..9fc991166a3
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
@@ -0,0 +1,1154 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * Shareable transformations that transform "special" ALU instructions
+ * into ALU instructions that are supported by hardware.
+ *
+ */
+
+#include "radeon_program_alu.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+
+
+static struct rc_instruction *emit1(
+	struct radeon_compiler * c, struct rc_instruction * after,
+	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+	struct rc_src_register SrcReg)
+{
+	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+	fpi->U.I.Opcode = Opcode;
+	fpi->U.I.SaturateMode = Saturate;
+	fpi->U.I.DstReg = DstReg;
+	fpi->U.I.SrcReg[0] = SrcReg;
+	return fpi;
+}
+
+static struct rc_instruction *emit2(
+	struct radeon_compiler * c, struct rc_instruction * after,
+	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+	struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
+{
+	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+	fpi->U.I.Opcode = Opcode;
+	fpi->U.I.SaturateMode = Saturate;
+	fpi->U.I.DstReg = DstReg;
+	fpi->U.I.SrcReg[0] = SrcReg0;
+	fpi->U.I.SrcReg[1] = SrcReg1;
+	return fpi;
+}
+
+static struct rc_instruction *emit3(
+	struct radeon_compiler * c, struct rc_instruction * after,
+	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+	struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
+	struct rc_src_register SrcReg2)
+{
+	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+	fpi->U.I.Opcode = Opcode;
+	fpi->U.I.SaturateMode = Saturate;
+	fpi->U.I.DstReg = DstReg;
+	fpi->U.I.SrcReg[0] = SrcReg0;
+	fpi->U.I.SrcReg[1] = SrcReg1;
+	fpi->U.I.SrcReg[2] = SrcReg2;
+	return fpi;
+}
+
+static struct rc_dst_register dstregtmpmask(int index, int mask)
+{
+	struct rc_dst_register dst = {0};
+	dst.File = RC_FILE_TEMPORARY;
+	dst.Index = index;
+	dst.WriteMask = mask;
+	return dst;
+}
+
+static const struct rc_src_register builtin_zero = {
+	.File = RC_FILE_NONE,
+	.Index = 0,
+	.Swizzle = RC_SWIZZLE_0000
+};
+static const struct rc_src_register builtin_one = {
+	.File = RC_FILE_NONE,
+	.Index = 0,
+	.Swizzle = RC_SWIZZLE_1111
+};
+static const struct rc_src_register srcreg_undefined = {
+	.File = RC_FILE_NONE,
+	.Index = 0,
+	.Swizzle = RC_SWIZZLE_XYZW
+};
+
+static struct rc_src_register srcreg(int file, int index)
+{
+	struct rc_src_register src = srcreg_undefined;
+	src.File = file;
+	src.Index = index;
+	return src;
+}
+
+static struct rc_src_register srcregswz(int file, int index, int swz)
+{
+	struct rc_src_register src = srcreg_undefined;
+	src.File = file;
+	src.Index = index;
+	src.Swizzle = swz;
+	return src;
+}
+
+static struct rc_src_register absolute(struct rc_src_register reg)
+{
+	struct rc_src_register newreg = reg;
+	newreg.Abs = 1;
+	newreg.Negate = RC_MASK_NONE;
+	return newreg;
+}
+
+static struct rc_src_register negate(struct rc_src_register reg)
+{
+	struct rc_src_register newreg = reg;
+	newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
+	return newreg;
+}
+
+static struct rc_src_register swizzle(struct rc_src_register reg,
+		rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
+{
+	struct rc_src_register swizzled = reg;
+	swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
+	return swizzled;
+}
+
+static struct rc_src_register swizzle_smear(struct rc_src_register reg,
+		rc_swizzle x)
+{
+	return swizzle(reg, x, x, x, x);
+}
+
+static struct rc_src_register swizzle_xxxx(struct rc_src_register reg)
+{
+	return swizzle_smear(reg, RC_SWIZZLE_X);
+}
+
+static struct rc_src_register swizzle_yyyy(struct rc_src_register reg)
+{
+	return swizzle_smear(reg, RC_SWIZZLE_Y);
+}
+
+static struct rc_src_register swizzle_zzzz(struct rc_src_register reg)
+{
+	return swizzle_smear(reg, RC_SWIZZLE_Z);
+}
+
+static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
+{
+	return swizzle_smear(reg, RC_SWIZZLE_W);
+}
+
+static int is_dst_safe_to_reuse(struct rc_instruction *inst)
+{
+	const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
+	unsigned i;
+
+	assert(info->HasDstReg);
+
+	if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
+		return 0;
+
+	for (i = 0; i < info->NumSrcRegs; i++) {
+		if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+		    inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index)
+			return 0;
+	}
+
+	return 1;
+}
+
+static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c,
+					       struct rc_instruction *inst)
+{
+	unsigned tmp;
+
+	if (is_dst_safe_to_reuse(inst))
+		tmp = inst->U.I.DstReg.Index;
+	else
+		tmp = rc_find_free_temporary(c);
+
+	return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
+}
+
+static void transform_ABS(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_src_register src = inst->U.I.SrcReg[0];
+	src.Abs = 1;
+	src.Negate = RC_MASK_NONE;
+	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
+	rc_remove_instruction(inst);
+}
+
+static void transform_CEIL(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* Assuming:
+	 *     ceil(x) = -floor(-x)
+	 *
+	 * After inlining floor:
+	 *     ceil(x) = -(-x-frac(-x))
+	 *
+	 * After simplification:
+	 *     ceil(x) = x+frac(-x)
+	 */
+
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0]));
+	emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
+	rc_remove_instruction(inst);
+}
+
+static void transform_CLAMP(struct radeon_compiler *c,
+	struct rc_instruction *inst)
+{
+	/* CLAMP dst, src, min, max
+	 *    into:
+	 * MIN tmp, src, max
+	 * MAX dst, tmp, min
+	 */
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+	emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst,
+		inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
+	emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]);
+	rc_remove_instruction(inst);
+}
+
+static void transform_DP2(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_src_register src0 = inst->U.I.SrcReg[0];
+	struct rc_src_register src1 = inst->U.I.SrcReg[1];
+	src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+	src0.Swizzle &= ~(63 << (3 * 2));
+	src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+	src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+	src1.Swizzle &= ~(63 << (3 * 2));
+	src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+	emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+	rc_remove_instruction(inst);
+}
+
+static void transform_DPH(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_src_register src0 = inst->U.I.SrcReg[0];
+	src0.Negate &= ~RC_MASK_W;
+	src0.Swizzle &= ~(7 << (3 * 3));
+	src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
+	emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
+	rc_remove_instruction(inst);
+}
+
+/**
+ * [1, src0.y*src1.y, src0.z, src1.w]
+ * So basically MUL with lotsa swizzling.
+ */
+static void transform_DST(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
+		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
+	rc_remove_instruction(inst);
+}
+
+static void transform_FLR(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]);
+	emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
+	rc_remove_instruction(inst);
+}
+
+/**
+ * Definition of LIT (from ARB_fragment_program):
+ *
+ *  tmp = VectorLoad(op0);
+ *  if (tmp.x < 0) tmp.x = 0;
+ *  if (tmp.y < 0) tmp.y = 0;
+ *  if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ *  else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ *  result.x = 1.0;
+ *  result.y = tmp.x;
+ *  result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ *  result.w = 1.0;
+ *
+ * The longest path of computation is the one leading to result.z,
+ * consisting of 5 operations. This implementation of LIT takes
+ * 5 slots, if the subsequent optimization passes are clever enough
+ * to pair instructions correctly.
+ */
+static void transform_LIT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	unsigned int constant;
+	unsigned int constant_swizzle;
+	unsigned int temp;
+	struct rc_src_register srctemp;
+
+	constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
+
+	if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
+		struct rc_instruction * inst_mov;
+
+		inst_mov = emit1(c, inst,
+			RC_OPCODE_MOV, 0, inst->U.I.DstReg,
+			srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c)));
+
+		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	}
+
+	temp = inst->U.I.DstReg.Index;
+	srctemp = srcreg(RC_FILE_TEMPORARY, temp);
+
+	/* tmp.x = max(0.0, Src.x); */
+	/* tmp.y = max(0.0, Src.y); */
+	/* tmp.w = clamp(Src.z, -128+eps, 128-eps); */
+	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+		dstregtmpmask(temp, RC_MASK_XYW),
+		inst->U.I.SrcReg[0],
+		swizzle(srcreg(RC_FILE_CONSTANT, constant),
+			RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
+	emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
+		dstregtmpmask(temp, RC_MASK_Z),
+		swizzle_wwww(srctemp),
+		negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
+
+	/* tmp.w = Pow(tmp.y, tmp.w) */
+	emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
+		dstregtmpmask(temp, RC_MASK_W),
+		swizzle_yyyy(srctemp));
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+		dstregtmpmask(temp, RC_MASK_W),
+		swizzle_wwww(srctemp),
+		swizzle_zzzz(srctemp));
+	emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
+		dstregtmpmask(temp, RC_MASK_W),
+		swizzle_wwww(srctemp));
+
+	/* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
+		dstregtmpmask(temp, RC_MASK_Z),
+		negate(swizzle_xxxx(srctemp)),
+		swizzle_wwww(srctemp),
+		builtin_zero);
+
+	/* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
+	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
+		dstregtmpmask(temp, RC_MASK_XYW),
+		swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_LRP(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+		dst,
+		inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+		inst->U.I.DstReg,
+		inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_POW(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register tempdst = try_to_reuse_dst(c, inst);
+	struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index);
+	tempdst.WriteMask = RC_MASK_W;
+	tempsrc.Swizzle = RC_SWIZZLE_WWWW;
+
+	emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
+	emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_RSQ(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
+}
+
+static void transform_SEQ(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SFL(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
+	rc_remove_instruction(inst);
+}
+
+static void transform_SGE(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SGT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SLE(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SLT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SNE(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SSG(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* result = sign(x)
+	 *
+	 *   CMP tmp0, -x, 1, 0
+	 *   CMP tmp1, x, 1, 0
+	 *   ADD result, tmp0, -tmp1;
+	 */
+	struct rc_dst_register dst0;
+	unsigned tmp1;
+
+	/* 0 < x */
+	dst0 = try_to_reuse_dst(c, inst);
+	emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
+	      dst0,
+	      negate(inst->U.I.SrcReg[0]),
+	      builtin_one,
+	      builtin_zero);
+
+	/* x < 0 */
+	tmp1 = rc_find_free_temporary(c);
+	emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
+	      dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
+	      inst->U.I.SrcReg[0],
+	      builtin_one,
+	      builtin_zero);
+
+	/* Either both are zero, or one of them is one and the other is zero. */
+	/* result = tmp0 - tmp1 */
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+	      inst->U.I.DstReg,
+	      srcreg(RC_FILE_TEMPORARY, dst0.Index),
+	      negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SUB(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	inst->U.I.Opcode = RC_OPCODE_ADD;
+	inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]);
+}
+
+static void transform_SWZ(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	inst->U.I.Opcode = RC_OPCODE_MOV;
+}
+
+static void transform_XPD(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
+		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
+		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+		negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
+
+	rc_remove_instruction(inst);
+}
+
+
+/**
+ * Can be used as a transformation for @ref radeonClauseLocalTransform,
+ * no userData necessary.
+ *
+ * Eliminates the following ALU instructions:
+ *  ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
+ * using:
+ *  MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
+ *
+ * Transforms RSQ to Radeon's native RSQ by explicitly setting
+ * absolute value.
+ *
+ * @note should be applicable to R300 and R500 fragment programs.
+ */
+int radeonTransformALU(
+	struct radeon_compiler * c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	switch(inst->U.I.Opcode) {
+	case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
+	case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+	case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
+	case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
+	case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+	case RC_OPCODE_DST: transform_DST(c, inst); return 1;
+	case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+	case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
+	case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+	case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+	case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
+	case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
+	case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+	case RC_OPCODE_SGE: transform_SGE(c, inst); return 1;
+	case RC_OPCODE_SGT: transform_SGT(c, inst); return 1;
+	case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
+	case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
+	case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
+	case RC_OPCODE_SSG: transform_SSG(c, inst); return 1;
+	case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+	case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+	case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
+	default:
+		return 0;
+	}
+}
+
+
+static void transform_r300_vertex_ABS(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* Note: r500 can take absolute values, but r300 cannot. */
+	inst->U.I.Opcode = RC_OPCODE_MAX;
+	inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0];
+	inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_CMP(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* There is no decent CMP available, so let's rig one up.
+	 * CMP is defined as dst = src0 < 0.0 ? src1 : src2
+	 * The following sequence consumes zero to two temps and two extra slots
+	 * (the second temp and the second slot is consumed by transform_LRP),
+	 * but should be equivalent:
+	 *
+	 * SLT tmp0, src0, 0.0
+	 * LRP dst, tmp0, src1, src2
+	 *
+	 * Yes, I know, I'm a mad scientist. ~ C. & M. */
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+
+	/* SLT tmp0, src0, 0.0 */
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+		dst,
+		inst->U.I.SrcReg[0], builtin_zero);
+
+	/* LRP dst, tmp0, src1, src2 */
+	transform_LRP(c,
+		emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
+		      inst->U.I.DstReg,
+		      srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1],  inst->U.I.SrcReg[2]));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_DP2(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_instruction *next_inst = inst->Next;
+	transform_DP2(c, inst);
+	next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
+}
+
+static void transform_r300_vertex_DP3(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_src_register src0 = inst->U.I.SrcReg[0];
+	struct rc_src_register src1 = inst->U.I.SrcReg[1];
+	src0.Negate &= ~RC_MASK_W;
+	src0.Swizzle &= ~(7 << (3 * 3));
+	src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+	src1.Negate &= ~RC_MASK_W;
+	src1.Swizzle &= ~(7 << (3 * 3));
+	src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+	emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+	rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+	unsigned constant_swizzle;
+	int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
+							 0.0000000000000000001,
+							 &constant_swizzle);
+
+	/* MOV dst, src */
+	dst.WriteMask = RC_MASK_XYZW;
+	emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
+		dst,
+		inst->U.I.SrcReg[0]);
+
+	/* MAX dst.y, src, 0.00...001 */
+	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+		dstregtmpmask(dst.Index, RC_MASK_Y),
+		srcreg(RC_FILE_TEMPORARY, dst.Index),
+		srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+
+	inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index);
+}
+
+static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
+	struct rc_instruction *inst)
+{
+	/* x = y  <==>  x >= y && y >= x */
+	int tmp = rc_find_free_temporary(c);
+
+	/* x <= y */
+	emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+	      dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+	      inst->U.I.SrcReg[0],
+	      inst->U.I.SrcReg[1]);
+
+	/* y <= x */
+	emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+	      inst->U.I.DstReg,
+	      inst->U.I.SrcReg[1],
+	      inst->U.I.SrcReg[0]);
+
+	/* x && y  =  x * y */
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+	      inst->U.I.DstReg,
+	      srcreg(RC_FILE_TEMPORARY, tmp),
+	      srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SNE(struct radeon_compiler *c,
+	struct rc_instruction *inst)
+{
+	/* x != y  <==>  x < y || y < x */
+	int tmp = rc_find_free_temporary(c);
+
+	/* x < y */
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+	      dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+	      inst->U.I.SrcReg[0],
+	      inst->U.I.SrcReg[1]);
+
+	/* y < x */
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+	      inst->U.I.DstReg,
+	      inst->U.I.SrcReg[1],
+	      inst->U.I.SrcReg[0]);
+
+	/* x || y  =  max(x, y) */
+	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+	      inst->U.I.DstReg,
+	      srcreg(RC_FILE_TEMPORARY, tmp),
+	      srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SGT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* x > y  <==>  -x < -y */
+	inst->U.I.Opcode = RC_OPCODE_SLT;
+	inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+	inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_SLE(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* x <= y  <==>  -x >= -y */
+	inst->U.I.Opcode = RC_OPCODE_SGE;
+	inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+	inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_SSG(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* result = sign(x)
+	 *
+	 *   SLT tmp0, 0, x;
+	 *   SLT tmp1, x, 0;
+	 *   ADD result, tmp0, -tmp1;
+	 */
+	struct rc_dst_register dst0 = try_to_reuse_dst(c, inst);
+	unsigned tmp1;
+
+	/* 0 < x */
+	dst0 = try_to_reuse_dst(c, inst);
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+	      dst0,
+	      builtin_zero,
+	      inst->U.I.SrcReg[0]);
+
+	/* x < 0 */
+	tmp1 = rc_find_free_temporary(c);
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+	      dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
+	      inst->U.I.SrcReg[0],
+	      builtin_zero);
+
+	/* Either both are zero, or one of them is one and the other is zero. */
+	/* result = tmp0 - tmp1 */
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+	      inst->U.I.DstReg,
+	      srcreg(RC_FILE_TEMPORARY, dst0.Index),
+	      negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
+
+	rc_remove_instruction(inst);
+}
+
+/**
+ * For use with rc_local_transform, this transforms non-native ALU
+ * instructions of the r300 up to r500 vertex engine.
+ */
+int r300_transform_vertex_alu(
+	struct radeon_compiler * c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	switch(inst->U.I.Opcode) {
+	case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
+	case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+	case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
+	case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
+	case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
+	case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
+	case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+	case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+	case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
+	case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+	case RC_OPCODE_SEQ:
+		if (!c->is_r500) {
+			transform_r300_vertex_SEQ(c, inst);
+			return 1;
+		}
+		return 0;
+	case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+	case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1;
+	case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1;
+	case RC_OPCODE_SNE:
+		if (!c->is_r500) {
+			transform_r300_vertex_SNE(c, inst);
+			return 1;
+		}
+		return 0;
+	case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1;
+	case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+	case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+	case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
+	default:
+		return 0;
+	}
+}
+
+static void sincos_constants(struct radeon_compiler* c, unsigned int *constants)
+{
+	static const float SinCosConsts[2][4] = {
+		{
+			1.273239545,		/* 4/PI */
+			-0.405284735,		/* -4/(PI*PI) */
+			3.141592654,		/* PI */
+			0.2225			/* weight */
+		},
+		{
+			0.75,
+			0.5,
+			0.159154943,		/* 1/(2*PI) */
+			6.283185307		/* 2*PI */
+		}
+	};
+	int i;
+
+	for(i = 0; i < 2; ++i)
+		constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
+}
+
+/**
+ * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
+ *
+ * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
+ * MAD tmp.x, tmp.y, |src|, tmp.x
+ * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
+ * MAD dest, tmp.y, weight, tmp.x
+ */
+static void sin_approx(
+	struct radeon_compiler* c, struct rc_instruction * inst,
+	struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants)
+{
+	unsigned int tempreg = rc_find_free_temporary(c);
+
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+		swizzle_xxxx(src),
+		srcreg(RC_FILE_CONSTANT, constants[0]));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
+		swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+		absolute(swizzle_xxxx(src)),
+		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
+		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+		absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))),
+		negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst,
+		swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+		swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])),
+		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
+}
+
+/**
+ * Translate the trigonometric functions COS, SIN, and SCS
+ * using only the basic instructions
+ *  MOV, ADD, MUL, MAD, FRC
+ */
+int r300_transform_trig_simple(struct radeon_compiler* c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	unsigned int constants[2];
+	unsigned int tempreg;
+
+	if (inst->U.I.Opcode != RC_OPCODE_COS &&
+	    inst->U.I.Opcode != RC_OPCODE_SIN &&
+	    inst->U.I.Opcode != RC_OPCODE_SCS)
+		return 0;
+
+	tempreg = rc_find_free_temporary(c);
+
+	sincos_constants(c, constants);
+
+	if (inst->U.I.Opcode == RC_OPCODE_COS) {
+		/* MAD tmp.x, src, 1/(2*PI), 0.75 */
+		/* FRC tmp.x, tmp.x */
+		/* MAD tmp.z, tmp.x, 2*PI, -PI */
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_xxxx(inst->U.I.SrcReg[0]),
+			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+			swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1])));
+		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+		sin_approx(c, inst, inst->U.I.DstReg,
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			constants);
+	} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_xxxx(inst->U.I.SrcReg[0]),
+			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+			swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1])));
+		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+		sin_approx(c, inst, inst->U.I.DstReg,
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			constants);
+	} else {
+		struct rc_dst_register dst;
+
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+			swizzle_xxxx(inst->U.I.SrcReg[0]),
+			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+			swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
+		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+			srcreg(RC_FILE_TEMPORARY, tempreg));
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+			srcreg(RC_FILE_TEMPORARY, tempreg),
+			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+		dst = inst->U.I.DstReg;
+
+		dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
+		sin_approx(c, inst, dst,
+			swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			constants);
+
+		dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
+		sin_approx(c, inst, dst,
+			swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			constants);
+	}
+
+	rc_remove_instruction(inst);
+
+	return 1;
+}
+
+static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	unsigned srctmp)
+{
+	if (inst->U.I.Opcode == RC_OPCODE_COS) {
+		emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+			srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+	} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+		emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+			inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+	} else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
+		struct rc_dst_register moddst = inst->U.I.DstReg;
+
+		if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
+			moddst.WriteMask = RC_MASK_X;
+			emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+				srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+		}
+		if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
+			moddst.WriteMask = RC_MASK_Y;
+			emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+				srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+		}
+	}
+
+	rc_remove_instruction(inst);
+}
+
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * to include pre-scaling by 1/(2*PI) and taking the fractional
+ * part, so that the input to COS and SIN is always in the range [0,1).
+ * SCS is replaced by one COS and one SIN instruction.
+ *
+ * @warning This transformation implicitly changes the semantics of SIN and COS!
+ */
+int radeonTransformTrigScale(struct radeon_compiler* c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	static const float RCP_2PI = 0.15915494309189535;
+	unsigned int temp;
+	unsigned int constant;
+	unsigned int constant_swizzle;
+
+	if (inst->U.I.Opcode != RC_OPCODE_COS &&
+	    inst->U.I.Opcode != RC_OPCODE_SIN &&
+	    inst->U.I.Opcode != RC_OPCODE_SCS)
+		return 0;
+
+	temp = rc_find_free_temporary(c);
+	constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
+
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
+		swizzle_xxxx(inst->U.I.SrcReg[0]),
+		srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+		srcreg(RC_FILE_TEMPORARY, temp));
+
+	r300_transform_SIN_COS_SCS(c, inst, temp);
+	return 1;
+}
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * so that the input to COS and SIN is always in the range [-PI, PI].
+ * SCS is replaced by one COS and one SIN instruction.
+ */
+int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	void *unused)
+{
+	static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
+	unsigned int temp;
+	unsigned int constant;
+
+	if (inst->U.I.Opcode != RC_OPCODE_COS &&
+	    inst->U.I.Opcode != RC_OPCODE_SIN &&
+	    inst->U.I.Opcode != RC_OPCODE_SCS)
+		return 0;
+
+	/* Repeat x in the range [-PI, PI]:
+	 *
+	 *   repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
+	 */
+
+	temp = rc_find_free_temporary(c);
+	constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
+
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+		swizzle_xxxx(inst->U.I.SrcReg[0]),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+		srcreg(RC_FILE_TEMPORARY, temp));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+		srcreg(RC_FILE_TEMPORARY, temp),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
+
+	r300_transform_SIN_COS_SCS(c, inst, temp);
+	return 1;
+}
+
+/**
+ * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
+ * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
+ * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
+ *
+ * @warning This explicitly changes the form of DDX and DDY!
+ */
+
+int radeonTransformDeriv(struct radeon_compiler* c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
+		return 0;
+
+	inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+	inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+	return 1;
+}
+
+/**
+ * IF Temp[0].x -\
+ * KILP         - > KIL -abs(Temp[0].x)
+ * ENDIF        -/
+ *
+ * This needs to be done in its own pass, because it modifies the instructions
+ * before and after KILP.
+ */
+void rc_transform_KILP(struct radeon_compiler * c, void *user)
+{
+	struct rc_instruction * inst;
+	for (inst = c->Program.Instructions.Next;
+			inst != &c->Program.Instructions; inst = inst->Next) {
+
+		if (inst->U.I.Opcode != RC_OPCODE_KILP)
+			continue;
+
+		inst->U.I.Opcode = RC_OPCODE_KIL;
+
+		if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
+				|| inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+			inst->U.I.SrcReg[0] = negate(builtin_one);
+		} else {
+
+			inst->U.I.SrcReg[0] =
+				negate(absolute(inst->Prev->U.I.SrcReg[0]));
+			/* Remove IF */
+			rc_remove_instruction(inst->Prev);
+			/* Remove ENDIF */
+			rc_remove_instruction(inst->Next);
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.h b/src/gallium/drivers/r300/compiler/radeon_program_alu.h
new file mode 100644
index 00000000000..b5f361e624f
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_ALU_H_
+#define __RADEON_PROGRAM_ALU_H_
+
+#include "radeon_program.h"
+
+int radeonTransformALU(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+int r300_transform_vertex_alu(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+int r300_transform_trig_simple(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+int radeonTransformTrigScale(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+int r300_transform_trig_scale_vertex(
+	struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	void*);
+
+int radeonTransformDeriv(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+void rc_transform_KILP(struct radeon_compiler * c,
+		       void *user);
+
+#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_constants.h b/src/gallium/drivers/r300/compiler/radeon_program_constants.h
new file mode 100644
index 00000000000..24577333450
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_constants.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_PROGRAM_CONSTANTS_H
+#define RADEON_PROGRAM_CONSTANTS_H
+
+typedef enum {
+	RC_SATURATE_NONE = 0,
+	RC_SATURATE_ZERO_ONE,
+	RC_SATURATE_MINUS_PLUS_ONE
+} rc_saturate_mode;
+
+typedef enum {
+	RC_TEXTURE_2D_ARRAY,
+	RC_TEXTURE_1D_ARRAY,
+	RC_TEXTURE_CUBE,
+	RC_TEXTURE_3D,
+	RC_TEXTURE_RECT,
+	RC_TEXTURE_2D,
+	RC_TEXTURE_1D
+} rc_texture_target;
+
+typedef enum {
+	/**
+	 * Used to indicate unused register descriptions and
+	 * source register that use a constant swizzle.
+	 */
+	RC_FILE_NONE = 0,
+	RC_FILE_TEMPORARY,
+
+	/**
+	 * Input register.
+	 *
+	 * \note The compiler attaches no implicit semantics to input registers.
+	 * Fragment/vertex program specific semantics must be defined explicitly
+	 * using the appropriate compiler interfaces.
+	 */
+	RC_FILE_INPUT,
+
+	/**
+	 * Output register.
+	 *
+	 * \note The compiler attaches no implicit semantics to input registers.
+	 * Fragment/vertex program specific semantics must be defined explicitly
+	 * using the appropriate compiler interfaces.
+	 */
+	RC_FILE_OUTPUT,
+	RC_FILE_ADDRESS,
+
+	/**
+	 * Indicates a constant from the \ref rc_constant_list .
+	 */
+	RC_FILE_CONSTANT,
+
+	/**
+	 * Indicates a special register, see RC_SPECIAL_xxx.
+	 */
+	RC_FILE_SPECIAL,
+
+	/**
+	 * Indicates this register should use the result of the presubtract
+	 * operation.
+	 */
+	RC_FILE_PRESUB
+} rc_register_file;
+
+enum {
+	/** R500 fragment program ALU result "register" */
+	RC_SPECIAL_ALU_RESULT = 0,
+
+	/** Must be last */
+	RC_NUM_SPECIAL_REGISTERS
+};
+
+#define RC_REGISTER_INDEX_BITS 10
+#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
+
+typedef enum {
+	RC_SWIZZLE_X = 0,
+	RC_SWIZZLE_Y,
+	RC_SWIZZLE_Z,
+	RC_SWIZZLE_W,
+	RC_SWIZZLE_ZERO,
+	RC_SWIZZLE_ONE,
+	RC_SWIZZLE_HALF,
+	RC_SWIZZLE_UNUSED
+} rc_swizzle;
+
+#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
+#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
+#define GET_SWZ(swz, idx)      (((swz) >> ((idx)*3)) & 0x7)
+#define GET_BIT(msk, idx)      (((msk) >> (idx)) & 0x1)
+#define SET_SWZ(swz, idx, newv) \
+	do { \
+		(swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
+	} while(0)
+
+#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
+#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z)
+#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
+#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
+#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
+#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
+#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
+#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF)
+#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED)
+
+/**
+ * \name Bitmasks for components of vectors.
+ *
+ * Used for write masks, negation masks, etc.
+ */
+/*@{*/
+#define RC_MASK_NONE 0
+#define RC_MASK_X 1
+#define RC_MASK_Y 2
+#define RC_MASK_Z 4
+#define RC_MASK_W 8
+#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
+#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
+#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
+#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
+/*@}*/
+
+typedef enum {
+	RC_ALURESULT_NONE = 0,
+	RC_ALURESULT_X,
+	RC_ALURESULT_W
+} rc_write_aluresult;
+
+typedef enum {
+	RC_PRESUB_NONE = 0,
+
+	/** 1 - 2 * src0 */
+	RC_PRESUB_BIAS,
+
+	/** src1 - src0 */
+	RC_PRESUB_SUB,
+
+	/** src1 + src0 */
+	RC_PRESUB_ADD,
+
+	/** 1 - src0 */
+	RC_PRESUB_INV
+} rc_presubtract_op;
+
+static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
+	switch(op){
+	case RC_PRESUB_BIAS:
+	case RC_PRESUB_INV:
+		return 1;
+	case RC_PRESUB_ADD:
+	case RC_PRESUB_SUB:
+		return 2;
+	default:
+		return 0;
+	}
+}
+
+#define RC_SOURCE_NONE  0x0
+#define RC_SOURCE_RGB   0x1
+#define RC_SOURCE_ALPHA 0x2
+
+#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.c b/src/gallium/drivers/r300/compiler/radeon_program_pair.c
new file mode 100644
index 00000000000..52315957520
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (C) 2008-2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_compiler_util.h"
+
+#include <stdlib.h>
+
+/**
+ * Return the source slot where we installed the given register access,
+ * or -1 if no slot was free anymore.
+ */
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+	unsigned int rgb, unsigned int alpha,
+	rc_register_file file, unsigned int index)
+{
+	int candidate = -1;
+	int candidate_quality = -1;
+	unsigned int alpha_used = 0;
+	unsigned int rgb_used = 0;
+	int i;
+
+	if ((!rgb && !alpha) || file == RC_FILE_NONE)
+		return 0;
+
+	/* Make sure only one presubtract operation is used per instruction. */
+	if (file == RC_FILE_PRESUB) {
+		if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used
+			&& index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+				return -1;
+		}
+
+		if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used
+			&& index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+				return -1;
+		}
+	}
+
+	for(i = 0; i < 3; ++i) {
+		int q = 0;
+		if (rgb) {
+			if (pair->RGB.Src[i].Used) {
+				if (pair->RGB.Src[i].File != file ||
+				    pair->RGB.Src[i].Index != index) {
+					rgb_used++;
+					continue;
+				}
+				q++;
+			}
+		}
+		if (alpha) {
+			if (pair->Alpha.Src[i].Used) {
+				if (pair->Alpha.Src[i].File != file ||
+				    pair->Alpha.Src[i].Index != index) {
+					alpha_used++;
+					continue;
+				}
+				q++;
+			}
+		}
+		if (q > candidate_quality) {
+			candidate_quality = q;
+			candidate = i;
+		}
+	}
+
+	if (file == RC_FILE_PRESUB) {
+		candidate = RC_PAIR_PRESUB_SRC;
+	} else if (candidate < 0 || (rgb && rgb_used > 2)
+			|| (alpha && alpha_used > 2)) {
+		return -1;
+	}
+
+	/* candidate >= 0 */
+
+	if (rgb) {
+		pair->RGB.Src[candidate].Used = 1;
+		pair->RGB.Src[candidate].File = file;
+		pair->RGB.Src[candidate].Index = index;
+		if (candidate == RC_PAIR_PRESUB_SRC) {
+			/* For registers with the RC_FILE_PRESUB file,
+			 * the index stores the presubtract op. */
+			int src_regs = rc_presubtract_src_reg_count(index);
+			for(i = 0; i < src_regs; i++) {
+				pair->RGB.Src[i].Used = 1;
+			}
+		}
+	}
+	if (alpha) {
+		pair->Alpha.Src[candidate].Used = 1;
+		pair->Alpha.Src[candidate].File = file;
+		pair->Alpha.Src[candidate].Index = index;
+		if (candidate == RC_PAIR_PRESUB_SRC) {
+			/* For registers with the RC_FILE_PRESUB file,
+			 * the index stores the presubtract op. */
+			int src_regs = rc_presubtract_src_reg_count(index);
+			for(i=0; i < src_regs; i++) {
+				pair->Alpha.Src[i].Used = 1;
+			}
+		}
+	}
+
+	return candidate;
+}
+
+static void pair_foreach_source_callback(
+	struct rc_pair_instruction * pair,
+	void * data,
+	rc_pair_foreach_src_fn cb,
+	unsigned int swz,
+	unsigned int src)
+{
+	/* swz > 3 means that the swizzle is either not used, or a constant
+	 * swizzle (e.g. 0, 1, 0.5). */
+	if(swz > 3)
+		return;
+
+	if(swz == RC_SWIZZLE_W) {
+		if (src == RC_PAIR_PRESUB_SRC) {
+			unsigned int i;
+			unsigned int src_count = rc_presubtract_src_reg_count(
+				pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
+			for(i = 0; i < src_count; i++) {
+				cb(data, &pair->Alpha.Src[i]);
+			}
+		} else {
+			cb(data, &pair->Alpha.Src[src]);
+		}
+	} else {
+		if (src == RC_PAIR_PRESUB_SRC) {
+			unsigned int i;
+			unsigned int src_count = rc_presubtract_src_reg_count(
+				pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
+			for(i = 0; i < src_count; i++) {
+				cb(data, &pair->RGB.Src[i]);
+			}
+		}
+		else {
+			cb(data, &pair->RGB.Src[src]);
+		}
+	}
+}
+
+void rc_pair_foreach_source_that_alpha_reads(
+	struct rc_pair_instruction * pair,
+	void * data,
+	rc_pair_foreach_src_fn cb)
+{
+	unsigned int i;
+	const struct rc_opcode_info * info =
+				rc_get_opcode_info(pair->Alpha.Opcode);
+	for(i = 0; i < info->NumSrcRegs; i++) {
+		pair_foreach_source_callback(pair, data, cb,
+					GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0),
+					pair->Alpha.Arg[i].Source);
+	}
+}
+
+void rc_pair_foreach_source_that_rgb_reads(
+	struct rc_pair_instruction * pair,
+	void * data,
+	rc_pair_foreach_src_fn cb)
+{
+	unsigned int i;
+	const struct rc_opcode_info * info =
+				rc_get_opcode_info(pair->RGB.Opcode);
+	for(i = 0; i < info->NumSrcRegs; i++) {
+		unsigned int chan;
+		unsigned int swz = RC_SWIZZLE_UNUSED;
+		/* Find a swizzle that is either X,Y,Z,or W.  We assume here
+		 * that if one channel swizzles X,Y, or Z, then none of the
+		 * other channels swizzle W, and vice-versa. */
+		for(chan = 0; chan < 4; chan++) {
+			swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan);
+			if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+			|| swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W)
+				continue;
+		}
+		pair_foreach_source_callback(pair, data, cb,
+					swz,
+					pair->RGB.Arg[i].Source);
+	}
+}
+
+struct rc_pair_instruction_source * rc_pair_get_src(
+	struct rc_pair_instruction * pair_inst,
+	struct rc_pair_instruction_arg * arg)
+{
+	unsigned int type;
+
+	type = rc_source_type_swz(arg->Swizzle);
+
+	if (type & RC_SOURCE_RGB) {
+		return &pair_inst->RGB.Src[arg->Source];
+	} else if (type & RC_SOURCE_ALPHA) {
+		return &pair_inst->Alpha.Src[arg->Source];
+	} else {
+		return NULL;
+	}
+}
+
+int rc_pair_get_src_index(
+	struct rc_pair_instruction * pair_inst,
+	struct rc_pair_instruction_source * src)
+{
+	int i;
+	for (i = 0; i < 3; i++) {
+		if (&pair_inst->RGB.Src[i] == src
+			|| &pair_inst->Alpha.Src[i] == src) {
+			return i;
+		}
+	}
+	return -1;
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.h b/src/gallium/drivers/r300/compiler/radeon_program_pair.h
new file mode 100644
index 00000000000..a957ea9f7a0
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_PAIR_H_
+#define __RADEON_PROGRAM_PAIR_H_
+
+#include "radeon_code.h"
+#include "radeon_opcodes.h"
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+
+
+/**
+ * \file
+ * Represents a paired ALU instruction, as found in R300 and R500
+ * fragment programs.
+ *
+ * Note that this representation is taking some liberties as far
+ * as register files are concerned, to allow separate register
+ * allocation.
+ *
+ * Also note that there are some subtleties in that the semantics
+ * of certain opcodes are implicitly changed in this representation;
+ * see \ref rc_pair_translate
+ */
+
+/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then
+ * the presubtract value will be used, and
+ * {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB.
+ */
+#define RC_PAIR_PRESUB_SRC 3
+
+struct rc_pair_instruction_source {
+	unsigned int Used:1;
+	unsigned int File:3;
+	unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct rc_pair_instruction_arg {
+	unsigned int Source:2;
+	unsigned int Swizzle:12;
+	unsigned int Abs:1;
+	unsigned int Negate:1;
+};
+
+struct rc_pair_sub_instruction {
+	unsigned int Opcode:8;
+	unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+	unsigned int WriteMask:4;
+	unsigned int Target:2;
+	unsigned int OutputWriteMask:3;
+	unsigned int DepthWriteMask:1;
+	unsigned int Saturate:1;
+
+	struct rc_pair_instruction_source Src[4];
+	struct rc_pair_instruction_arg Arg[3];
+};
+
+struct rc_pair_instruction {
+	struct rc_pair_sub_instruction RGB;
+	struct rc_pair_sub_instruction Alpha;
+
+	unsigned int WriteALUResult:2;
+	unsigned int ALUResultCompare:3;
+	unsigned int Nop:1;
+};
+
+typedef void (*rc_pair_foreach_src_fn)
+			(void *, struct rc_pair_instruction_source *);
+
+/**
+ * General helper functions for dealing with the paired instruction format.
+ */
+/*@{*/
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+	unsigned int rgb, unsigned int alpha,
+	rc_register_file file, unsigned int index);
+
+void rc_pair_foreach_source_that_alpha_reads(
+	struct rc_pair_instruction * pair,
+	void * data,
+	rc_pair_foreach_src_fn cb);
+
+void rc_pair_foreach_source_that_rgb_reads(
+	struct rc_pair_instruction * pair,
+	void * data,
+	rc_pair_foreach_src_fn cb);
+
+struct rc_pair_instruction_source * rc_pair_get_src(
+	struct rc_pair_instruction * pair_inst,
+	struct rc_pair_instruction_arg * arg);
+
+int rc_pair_get_src_index(
+	struct rc_pair_instruction * pair_inst,
+	struct rc_pair_instruction_source * src);
+/*@}*/
+
+
+/**
+ * Compiler passes that operate with the paired format.
+ */
+/*@{*/
+struct radeon_pair_handler;
+
+void rc_pair_translate(struct radeon_compiler *cc, void *user);
+void rc_pair_schedule(struct radeon_compiler *cc, void *user);
+void rc_pair_regalloc(struct radeon_compiler *cc, void *user);
+void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user);
+void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user);
+/*@}*/
+
+#endif /* __RADEON_PROGRAM_PAIR_H_ */
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c
new file mode 100644
index 00000000000..390d1319460
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+static const char * textarget_to_string(rc_texture_target target)
+{
+	switch(target) {
+	case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
+	case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
+	case RC_TEXTURE_CUBE: return "CUBE";
+	case RC_TEXTURE_3D: return "3D";
+	case RC_TEXTURE_RECT: return "RECT";
+	case RC_TEXTURE_2D: return "2D";
+	case RC_TEXTURE_1D: return "1D";
+	default: return "BAD_TEXTURE_TARGET";
+	}
+}
+
+static const char * presubtract_op_to_string(rc_presubtract_op op)
+{
+	switch(op) {
+	case RC_PRESUB_NONE:
+		return "NONE";
+	case RC_PRESUB_BIAS:
+		return "(1 - 2 * src0)";
+	case RC_PRESUB_SUB:
+		return "(src1 - src0)";
+	case RC_PRESUB_ADD:
+		return "(src1 + src0)";
+	case RC_PRESUB_INV:
+		return "(1 - src0)";
+	default:
+		return "BAD_PRESUBTRACT_OP";
+	}
+}
+
+static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
+{
+	if (func == RC_COMPARE_FUNC_NEVER) {
+		fprintf(f, "false");
+	} else if (func == RC_COMPARE_FUNC_ALWAYS) {
+		fprintf(f, "true");
+	} else {
+		const char * op;
+		switch(func) {
+		case RC_COMPARE_FUNC_LESS: op = "<"; break;
+		case RC_COMPARE_FUNC_EQUAL: op = "=="; break;
+		case RC_COMPARE_FUNC_LEQUAL: op = "<="; break;
+		case RC_COMPARE_FUNC_GREATER: op = ">"; break;
+		case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break;
+		case RC_COMPARE_FUNC_GEQUAL: op = ">="; break;
+		default: op = "???"; break;
+		}
+		fprintf(f, "%s %s %s", lhs, op, rhs);
+	}
+}
+
+static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
+{
+	if (file == RC_FILE_NONE) {
+		fprintf(f, "none");
+	} else if (file == RC_FILE_SPECIAL) {
+		switch(index) {
+		case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
+		default: fprintf(f, "special[%i]", index); break;
+		}
+	} else {
+		const char * filename;
+		switch(file) {
+		case RC_FILE_TEMPORARY: filename = "temp"; break;
+		case RC_FILE_INPUT: filename = "input"; break;
+		case RC_FILE_OUTPUT: filename = "output"; break;
+		case RC_FILE_ADDRESS: filename = "addr"; break;
+		case RC_FILE_CONSTANT: filename = "const"; break;
+		default: filename = "BAD FILE"; break;
+		}
+		fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
+	}
+}
+
+static void rc_print_mask(FILE * f, unsigned int mask)
+{
+	if (mask & RC_MASK_X) fprintf(f, "x");
+	if (mask & RC_MASK_Y) fprintf(f, "y");
+	if (mask & RC_MASK_Z) fprintf(f, "z");
+	if (mask & RC_MASK_W) fprintf(f, "w");
+}
+
+static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
+{
+	rc_print_register(f, dst.File, dst.Index, 0);
+	if (dst.WriteMask != RC_MASK_XYZW) {
+		fprintf(f, ".");
+		rc_print_mask(f, dst.WriteMask);
+	}
+}
+
+static char rc_swizzle_char(unsigned int swz)
+{
+	switch(swz) {
+	case RC_SWIZZLE_X: return 'x';
+	case RC_SWIZZLE_Y: return 'y';
+	case RC_SWIZZLE_Z: return 'z';
+	case RC_SWIZZLE_W: return 'w';
+	case RC_SWIZZLE_ZERO: return '0';
+	case RC_SWIZZLE_ONE: return '1';
+	case RC_SWIZZLE_HALF: return 'H';
+	case RC_SWIZZLE_UNUSED: return '_';
+	}
+	fprintf(stderr, "bad swz: %u\n", swz);
+	return '?';
+}
+
+static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
+{
+	unsigned int comp;
+	for(comp = 0; comp < 4; ++comp) {
+		rc_swizzle swz = GET_SWZ(swizzle, comp);
+		if (GET_BIT(negate, comp))
+			fprintf(f, "-");
+		fprintf(f, "%c", rc_swizzle_char(swz));
+	}
+}
+
+static void rc_print_presub_instruction(FILE * f,
+					struct rc_presub_instruction inst)
+{
+	fprintf(f,"(");
+	switch(inst.Opcode){
+	case RC_PRESUB_BIAS:
+		fprintf(f, "1 - 2 * ");
+		rc_print_register(f, inst.SrcReg[0].File,
+				inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+		break;
+	case RC_PRESUB_SUB:
+		rc_print_register(f, inst.SrcReg[1].File,
+				inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
+		fprintf(f, " - ");
+		rc_print_register(f, inst.SrcReg[0].File,
+				inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+		break;
+	case RC_PRESUB_ADD:
+		rc_print_register(f, inst.SrcReg[1].File,
+				inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
+		fprintf(f, " + ");
+		rc_print_register(f, inst.SrcReg[0].File,
+				inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+		break;
+	case RC_PRESUB_INV:
+		fprintf(f, "1 - ");
+		rc_print_register(f, inst.SrcReg[0].File,
+				inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+		break;
+	default:
+		break;
+	}
+	fprintf(f, ")");
+}
+
+static void rc_print_src_register(FILE * f, struct rc_instruction * inst,
+						struct rc_src_register src)
+{
+	int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
+
+	if (src.Negate == RC_MASK_XYZW)
+		fprintf(f, "-");
+	if (src.Abs)
+		fprintf(f, "|");
+
+	if(src.File == RC_FILE_PRESUB)
+		rc_print_presub_instruction(f, inst->U.I.PreSub);
+	else
+		rc_print_register(f, src.File, src.Index, src.RelAddr);
+
+	if (src.Abs && !trivial_negate)
+		fprintf(f, "|");
+
+	if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
+		fprintf(f, ".");
+		rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
+	}
+
+	if (src.Abs && trivial_negate)
+		fprintf(f, "|");
+}
+
+static unsigned update_branch_depth(rc_opcode opcode, unsigned *branch_depth)
+{
+	switch (opcode) {
+	case RC_OPCODE_IF:
+	case RC_OPCODE_BGNLOOP:
+		return (*branch_depth)++ * 2;
+
+	case RC_OPCODE_ENDIF:
+	case RC_OPCODE_ENDLOOP:
+		assert(*branch_depth > 0);
+		return --(*branch_depth) * 2;
+
+	case RC_OPCODE_ELSE:
+		assert(*branch_depth > 0);
+		return (*branch_depth - 1) * 2;
+
+	default:
+		return *branch_depth * 2;
+	}
+}
+
+static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst, unsigned *branch_depth)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	unsigned int reg;
+	unsigned spaces = update_branch_depth(inst->U.I.Opcode, branch_depth);
+
+	for (unsigned i = 0; i < spaces; i++)
+		fprintf(f, " ");
+
+	fprintf(f, "%s", opcode->Name);
+
+	switch(inst->U.I.SaturateMode) {
+	case RC_SATURATE_NONE: break;
+	case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
+	case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
+	default: fprintf(f, "_BAD_SAT"); break;
+	}
+
+	if (opcode->HasDstReg) {
+		fprintf(f, " ");
+		rc_print_dst_register(f, inst->U.I.DstReg);
+		if (opcode->NumSrcRegs)
+			fprintf(f, ",");
+	}
+
+	for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
+		if (reg > 0)
+			fprintf(f, ",");
+		fprintf(f, " ");
+		rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]);
+	}
+
+	if (opcode->HasTexture) {
+		fprintf(f, ", %s%s[%u]",
+			textarget_to_string(inst->U.I.TexSrcTarget),
+			inst->U.I.TexShadow ? "SHADOW" : "",
+			inst->U.I.TexSrcUnit);
+	}
+
+	fprintf(f, ";");
+
+	if (inst->U.I.WriteALUResult) {
+		fprintf(f, " [aluresult = (");
+		rc_print_comparefunc(f,
+			(inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w",
+			inst->U.I.ALUResultCompare, "0");
+		fprintf(f, ")]");
+	}
+
+	fprintf(f, "\n");
+}
+
+static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst, unsigned *branch_depth)
+{
+	struct rc_pair_instruction * inst = &fullinst->U.P;
+	int printedsrc = 0;
+	unsigned spaces = update_branch_depth(inst->RGB.Opcode != RC_OPCODE_NOP ?
+					      inst->RGB.Opcode : inst->Alpha.Opcode, branch_depth);
+
+	for (unsigned i = 0; i < spaces; i++)
+		fprintf(f, " ");
+
+	for(unsigned int src = 0; src < 3; ++src) {
+		if (inst->RGB.Src[src].Used) {
+			if (printedsrc)
+				fprintf(f, ", ");
+			fprintf(f, "src%i.xyz = ", src);
+			rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
+			printedsrc = 1;
+		}
+		if (inst->Alpha.Src[src].Used) {
+			if (printedsrc)
+				fprintf(f, ", ");
+			fprintf(f, "src%i.w = ", src);
+			rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
+			printedsrc = 1;
+		}
+	}
+	if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+		fprintf(f, ", srcp.xyz = %s",
+			presubtract_op_to_string(
+					inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index));
+	}
+	if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+		fprintf(f, ", srcp.w = %s",
+			presubtract_op_to_string(
+					inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
+	}
+	fprintf(f, "\n");
+
+	if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+		for (unsigned i = 0; i < spaces; i++)
+			fprintf(f, " ");
+
+		fprintf(f, "     %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
+		if (inst->RGB.WriteMask)
+			fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex,
+				(inst->RGB.WriteMask & 1) ? "x" : "",
+				(inst->RGB.WriteMask & 2) ? "y" : "",
+				(inst->RGB.WriteMask & 4) ? "z" : "");
+		if (inst->RGB.OutputWriteMask)
+			fprintf(f, " color[%i].%s%s%s", inst->RGB.Target,
+				(inst->RGB.OutputWriteMask & 1) ? "x" : "",
+				(inst->RGB.OutputWriteMask & 2) ? "y" : "",
+				(inst->RGB.OutputWriteMask & 4) ? "z" : "");
+		if (inst->WriteALUResult == RC_ALURESULT_X)
+			fprintf(f, " aluresult");
+
+		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+			const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
+			const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
+			fprintf(f, ", %s%ssrc", neg, abs);
+			if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
+				fprintf(f,"p");
+			else
+				fprintf(f,"%d", inst->RGB.Arg[arg].Source);
+			fprintf(f,".%c%c%c%s",
+				rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
+				rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
+				rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
+				abs);
+		}
+		fprintf(f, "\n");
+	}
+
+	if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+		for (unsigned i = 0; i < spaces; i++)
+			fprintf(f, " ");
+
+		fprintf(f, "     %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
+		if (inst->Alpha.WriteMask)
+			fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
+		if (inst->Alpha.OutputWriteMask)
+			fprintf(f, " color[%i].w", inst->Alpha.Target);
+		if (inst->Alpha.DepthWriteMask)
+			fprintf(f, " depth.w");
+		if (inst->WriteALUResult == RC_ALURESULT_W)
+			fprintf(f, " aluresult");
+
+		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+			const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
+			const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
+			fprintf(f, ", %s%ssrc", neg, abs);
+			if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
+				fprintf(f,"p");
+			else
+				fprintf(f,"%d", inst->Alpha.Arg[arg].Source);
+			fprintf(f,".%c%s",
+				rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs);
+		}
+		fprintf(f, "\n");
+	}
+
+	if (inst->WriteALUResult) {
+		for (unsigned i = 0; i < spaces; i++)
+			fprintf(f, " ");
+
+		fprintf(f, "      [aluresult = (");
+		rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
+		fprintf(f, ")]\n");
+	}
+}
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog)
+{
+	unsigned int linenum = 0;
+	unsigned branch_depth = 0;
+	struct rc_instruction *inst;
+
+	fprintf(stderr, "# Radeon Compiler Program\n");
+
+	for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+		fprintf(stderr, "%3d: ", linenum);
+
+		if (inst->Type == RC_INSTRUCTION_PAIR)
+			rc_print_pair_instruction(stderr, inst, &branch_depth);
+		else
+			rc_print_normal_instruction(stderr, inst, &branch_depth);
+
+		linenum++;
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.c b/src/gallium/drivers/r300/compiler/radeon_program_tex.c
new file mode 100644
index 00000000000..8d16b2cf9ec
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.c
@@ -0,0 +1,528 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_tex.h"
+
+#include "radeon_compiler_util.h"
+
+/* Series of transformations to be done on textures. */
+
+static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler,
+						int tmu)
+{
+	struct rc_src_register reg = { 0, };
+
+	if (compiler->enable_shadow_ambient) {
+		reg.File = RC_FILE_CONSTANT;
+		reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
+						   RC_STATE_SHADOW_AMBIENT, tmu);
+		reg.Swizzle = RC_SWIZZLE_WWWW;
+	} else {
+		reg.File = RC_FILE_NONE;
+		reg.Swizzle = RC_SWIZZLE_0000;
+	}
+
+	reg.Swizzle = combine_swizzles(reg.Swizzle,
+				compiler->state.unit[tmu].texture_swizzle);
+	return reg;
+}
+
+static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler,
+						int tmu)
+{
+	struct rc_src_register reg = { 0, };
+
+	reg.File = RC_FILE_NONE;
+	reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111,
+				compiler->state.unit[tmu].texture_swizzle);
+	return reg;
+}
+
+static void scale_texcoords(struct r300_fragment_program_compiler *compiler,
+			    struct rc_instruction *inst,
+			    unsigned state_constant)
+{
+	struct rc_instruction *inst_mov;
+
+	unsigned temp = rc_find_free_temporary(&compiler->Base);
+
+	inst_mov = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+
+	inst_mov->U.I.Opcode = RC_OPCODE_MUL;
+	inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mov->U.I.DstReg.Index = temp;
+	inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+	inst_mov->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+	inst_mov->U.I.SrcReg[1].Index =
+			rc_constants_add_state(&compiler->Base.Program.Constants,
+					       state_constant, inst->U.I.TexSrcUnit);
+
+	reset_srcreg(&inst->U.I.SrcReg[0]);
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = temp;
+}
+
+static void projective_divide(struct r300_fragment_program_compiler *compiler,
+			      struct rc_instruction *inst)
+{
+	struct rc_instruction *inst_mul, *inst_rcp;
+
+	unsigned temp = rc_find_free_temporary(&compiler->Base);
+
+	inst_rcp = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_rcp->U.I.DstReg.Index = temp;
+	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+	inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+	/* Because the input can be arbitrarily swizzled,
+	 * read the component mapped to W. */
+	inst_rcp->U.I.SrcReg[0].Swizzle =
+		RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
+
+	inst_mul = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mul->U.I.DstReg.Index = temp;
+	inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+	inst_mul->U.I.SrcReg[1].Index = temp;
+	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+
+	reset_srcreg(&inst->U.I.SrcReg[0]);
+	inst->U.I.Opcode = RC_OPCODE_TEX;
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = temp;
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
+ *  - implement texture compare (shadow extensions)
+ *  - extract non-native source / destination operands
+ *  - premultiply texture coordinates for RECT
+ *  - extract operand swizzles
+ *  - introduce a temporary register when write masks are needed
+ */
+int radeonTransformTEX(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void* data)
+{
+	struct r300_fragment_program_compiler *compiler =
+		(struct r300_fragment_program_compiler*)data;
+	rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
+	int is_rect = inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+		      compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords;
+
+	if (inst->U.I.Opcode != RC_OPCODE_TEX &&
+		inst->U.I.Opcode != RC_OPCODE_TXB &&
+		inst->U.I.Opcode != RC_OPCODE_TXP &&
+		inst->U.I.Opcode != RC_OPCODE_TXD &&
+		inst->U.I.Opcode != RC_OPCODE_TXL &&
+		inst->U.I.Opcode != RC_OPCODE_KIL)
+		return 0;
+
+	/* ARB_shadow & EXT_shadow_funcs */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+		((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
+		 (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
+		rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+
+		if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+
+			if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+				inst->U.I.SrcReg[0] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
+			} else {
+				inst->U.I.SrcReg[0] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
+			}
+
+			return 1;
+		} else {
+			struct rc_instruction * inst_rcp = NULL;
+			struct rc_instruction *inst_mul, *inst_add, *inst_cmp;
+			unsigned tmp_texsample;
+			unsigned tmp_sum;
+			int pass, fail;
+
+			/* Save the output register. */
+			struct rc_dst_register output_reg = inst->U.I.DstReg;
+			unsigned saturate_mode = inst->U.I.SaturateMode;
+
+			/* Redirect TEX to a new temp. */
+			tmp_texsample = rc_find_free_temporary(c);
+			inst->U.I.SaturateMode = 0;
+			inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst->U.I.DstReg.Index = tmp_texsample;
+			inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+			tmp_sum = rc_find_free_temporary(c);
+
+			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+				/* Compute 1/W. */
+				inst_rcp = rc_insert_new_instruction(c, inst);
+				inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+				inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+				inst_rcp->U.I.DstReg.Index = tmp_sum;
+				inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+				inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+				inst_rcp->U.I.SrcReg[0].Swizzle =
+					RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
+			}
+
+			/* Divide Z by W (if it's TXP) and saturate. */
+			inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
+			inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;
+			inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mul->U.I.DstReg.Index = tmp_sum;
+			inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;
+			inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
+			inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+			inst_mul->U.I.SrcReg[0].Swizzle =
+				RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
+			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+				inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+				inst_mul->U.I.SrcReg[1].Index = tmp_sum;
+				inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+			}
+
+			/* Add the depth texture value. */
+			inst_add = rc_insert_new_instruction(c, inst_mul);
+			inst_add->U.I.Opcode = RC_OPCODE_ADD;
+			inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_add->U.I.DstReg.Index = tmp_sum;
+			inst_add->U.I.DstReg.WriteMask = RC_MASK_W;
+			inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst_add->U.I.SrcReg[0].Index = tmp_sum;
+			inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+			inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+			inst_add->U.I.SrcReg[1].Index = tmp_texsample;
+			inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
+
+			/* Note that SrcReg[0] is r, SrcReg[1] is tex and:
+			 *   LESS:    r  < tex  <=>      -tex+r < 0
+			 *   GEQUAL:  r >= tex  <=> not (-tex+r < 0)
+			 *   GREATER: r  > tex  <=>       tex-r < 0
+			 *   LEQUAL:  r <= tex  <=> not ( tex-r < 0)
+			 *   EQUAL:   GEQUAL
+			 *   NOTEQUAL:LESS
+			 */
+
+			/* This negates either r or tex: */
+			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||
+			    comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)
+				inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;
+			else
+				inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
+
+			/* This negates the whole expresion: */
+			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||
+			    comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
+				pass = 1;
+				fail = 2;
+			} else {
+				pass = 2;
+				fail = 1;
+			}
+
+			inst_cmp = rc_insert_new_instruction(c, inst_add);
+			inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+			inst_cmp->U.I.SaturateMode = saturate_mode;
+			inst_cmp->U.I.DstReg = output_reg;
+			inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
+			inst_cmp->U.I.SrcReg[0].Swizzle =
+					combine_swizzles(RC_SWIZZLE_WWWW,
+							 compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle);
+			inst_cmp->U.I.SrcReg[pass] = shadow_pass_value(compiler, inst->U.I.TexSrcUnit);
+			inst_cmp->U.I.SrcReg[fail] = shadow_fail_value(compiler, inst->U.I.TexSrcUnit);
+
+			assert(tmp_texsample != tmp_sum);
+		}
+	}
+
+	/* R300 cannot sample from rectangles and the wrap mode fallback needs
+	 * normalized coordinates anyway. */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+	    is_rect && (!c->is_r500 || wrapmode != RC_WRAP_NONE)) {
+		scale_texcoords(compiler, inst, RC_STATE_R300_TEXRECT_FACTOR);
+		inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+	}
+
+	/* Divide by W if needed. */
+	if (inst->U.I.Opcode == RC_OPCODE_TXP &&
+	    (wrapmode == RC_WRAP_REPEAT || wrapmode == RC_WRAP_MIRRORED_REPEAT ||
+	     compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch)) {
+		projective_divide(compiler, inst);
+	}
+
+	/* Texture wrap modes don't work on NPOT textures.
+	 *
+	 * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
+	 * mirroring are not. If we need to repeat, we do:
+	 *
+	 * MUL temp, texcoord, <scaling factor constant>
+	 * FRC temp, temp ; Discard integer portion of coords
+	 *
+	 * This gives us coords in [0, 1].
+	 *
+	 * Mirroring is trickier. We're going to start out like repeat:
+	 *
+	 * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
+	 * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
+	 *                            ; so scale to [0, 1]
+	 * FRC temp, temp ; Make the pattern repeat
+	 * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
+	 * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
+	 *				; The pattern is backwards, so reverse it (1-x).
+	 *
+	 * This gives us coords in [0, 1].
+	 *
+	 * ~ C & M. ;)
+	 */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+	    wrapmode != RC_WRAP_NONE) {
+		struct rc_instruction *inst_mov;
+		unsigned temp = rc_find_free_temporary(c);
+
+		if (wrapmode == RC_WRAP_REPEAT) {
+			/* Both instructions will be paired up. */
+			struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+			inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_frc->U.I.DstReg.Index = temp;
+			inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+			inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+		} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
+			/*
+			 * Function:
+			 *   f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
+			 *
+			 * Code:
+			 *   MUL temp, src0, 0.5
+			 *   FRC temp, temp
+			 *   MAD temp, temp, 2, -1
+			 *   ADD temp, 1, -abs(temp)
+			 */
+
+			struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
+			unsigned two, two_swizzle;
+
+			inst_mul = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+			inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mul->U.I.DstReg.Index = temp;
+			inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+			inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+			inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
+
+			inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+			inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_frc->U.I.DstReg.Index = temp;
+			inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+			inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst_frc->U.I.SrcReg[0].Index = temp;
+			inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+			two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
+			inst_mad = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+			inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mad->U.I.DstReg.Index = temp;
+			inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+			inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst_mad->U.I.SrcReg[0].Index = temp;
+			inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+			inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+			inst_mad->U.I.SrcReg[1].Index = two;
+			inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
+			inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
+			inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
+
+			inst_add = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_add->U.I.Opcode = RC_OPCODE_ADD;
+			inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_add->U.I.DstReg.Index = temp;
+			inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+			inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+			inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+			inst_add->U.I.SrcReg[1].Index = temp;
+			inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+			inst_add->U.I.SrcReg[1].Abs = 1;
+			inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
+		} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
+			/*
+			 * Mirrored clamp modes are bloody simple, we just use abs
+			 * to mirror [0, 1] into [-1, 0]. This works for
+			 * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
+			 */
+			struct rc_instruction *inst_mov;
+
+			inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.DstReg.Index = temp;
+			inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+			inst_mov->U.I.SrcReg[0].Abs = 1;
+		}
+
+		/* Preserve W for TXP/TXB. */
+		inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.DstReg.Index = temp;
+		inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+		reset_srcreg(&inst->U.I.SrcReg[0]);
+		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst->U.I.SrcReg[0].Index = temp;
+	}
+
+	/* NPOT -> POT conversion for 3D textures. */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+	    compiler->state.unit[inst->U.I.TexSrcUnit].clamp_and_scale_before_fetch) {
+		struct rc_instruction *inst_mov;
+		unsigned temp = rc_find_free_temporary(c);
+
+		/* Saturate XYZ. */
+		inst_mov = rc_insert_new_instruction(c, inst->Prev);
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
+		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.DstReg.Index = temp;
+		inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+		/* Copy W. */
+		inst_mov = rc_insert_new_instruction(c, inst->Prev);
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.DstReg.Index = temp;
+		inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+		reset_srcreg(&inst->U.I.SrcReg[0]);
+		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst->U.I.SrcReg[0].Index = temp;
+
+		scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
+	}
+
+	/* Convert SNORM-encoded ATI1N sampled as UNORM to SNORM.
+	 * Formula: dst = tex > 0.5 ? tex*2-2 : tex*2
+	 */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+	    compiler->state.unit[inst->U.I.TexSrcUnit].convert_unorm_to_snorm) {
+		unsigned two, two_swizzle;
+		struct rc_instruction *inst_mul, *inst_mad, *inst_cnd;
+
+		two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2.35, &two_swizzle);
+
+		inst_mul = rc_insert_new_instruction(c, inst);
+		inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+		inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
+		inst_mul->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst_mul->U.I.SrcReg[0].Index = rc_find_free_temporary(c); /* redirected TEX output */
+		inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; /* 2 */
+		inst_mul->U.I.SrcReg[1].Index = two;
+		inst_mul->U.I.SrcReg[1].Swizzle = two_swizzle;
+
+		inst_mad = rc_insert_new_instruction(c, inst_mul);
+		inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+		inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
+		inst_mad->U.I.SrcReg[0] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
+		inst_mad->U.I.SrcReg[1] = inst_mul->U.I.SrcReg[1]; /* 2 */
+		inst_mad->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[1]; /* 2 */
+		inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZW;
+
+		inst_cnd = rc_insert_new_instruction(c, inst_mad);
+		inst_cnd->U.I.Opcode = RC_OPCODE_CND;
+		inst_cnd->U.I.SaturateMode = inst->U.I.SaturateMode;
+		inst_cnd->U.I.DstReg = inst->U.I.DstReg;
+		inst_cnd->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst_cnd->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
+		inst_cnd->U.I.SrcReg[0].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
+		inst_cnd->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+		inst_cnd->U.I.SrcReg[1].Index = inst_mul->U.I.DstReg.Index;
+		inst_cnd->U.I.SrcReg[1].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].texture_swizzle;
+		inst_cnd->U.I.SrcReg[2] = inst_mul->U.I.SrcReg[0]; /* redirected TEX output */
+
+		inst->U.I.SaturateMode = 0;
+		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst->U.I.DstReg.Index = inst_mul->U.I.SrcReg[0].Index;
+		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	}
+
+	/* Cannot write texture to output registers or with saturate (all chips),
+	 * or with masks (non-r500). */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+		(inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+		 inst->U.I.SaturateMode ||
+		 (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
+		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
+
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;
+		inst_mov->U.I.DstReg = inst->U.I.DstReg;
+		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
+
+		inst->U.I.SaturateMode = 0;
+		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	}
+
+	/* Cannot read texture coordinate from constants file */
+	if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
+		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
+		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+		reset_srcreg(&inst->U.I.SrcReg[0]);
+		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+	}
+
+	return 1;
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.h b/src/gallium/drivers/r300/compiler/radeon_program_tex.h
new file mode 100644
index 00000000000..a0105051ac4
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_TEX_H_
+#define __RADEON_PROGRAM_TEX_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+int radeonTransformTEX(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void* data);
+
+#endif /* __RADEON_PROGRAM_TEX_H_ */
diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.c b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c
new file mode 100644
index 00000000000..7d76585a593
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_remove_constants.h"
+#include "radeon_dataflow.h"
+
+struct mark_used_data {
+	unsigned char * const_used;
+	unsigned * has_rel_addr;
+};
+
+static void remap_regs(void * userdata, struct rc_instruction * inst,
+			rc_register_file * pfile, unsigned int * pindex)
+{
+	unsigned *inv_remap_table = userdata;
+
+	if (*pfile == RC_FILE_CONSTANT) {
+		*pindex = inv_remap_table[*pindex];
+	}
+}
+
+static void mark_used(void * userdata, struct rc_instruction * inst,
+						struct rc_src_register * src)
+{
+	struct mark_used_data * d = userdata;
+
+	if (src->File == RC_FILE_CONSTANT) {
+		if (src->RelAddr) {
+			*d->has_rel_addr = 1;
+		} else {
+			d->const_used[src->Index] = 1;
+		}
+	}
+}
+
+void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
+{
+	unsigned **out_remap_table = (unsigned**)user;
+	unsigned char *const_used;
+	unsigned *remap_table;
+	unsigned *inv_remap_table;
+	unsigned has_rel_addr = 0;
+	unsigned is_identity = 1;
+	unsigned are_externals_remapped = 0;
+	struct rc_constant *constants = c->Program.Constants.Constants;
+	struct mark_used_data d;
+	unsigned new_count;
+
+	if (!c->Program.Constants.Count) {
+		*out_remap_table = NULL;
+		return;
+	}
+
+	const_used = malloc(c->Program.Constants.Count);
+	memset(const_used, 0, c->Program.Constants.Count);
+
+	d.const_used = const_used;
+	d.has_rel_addr = &has_rel_addr;
+
+	/* Pass 1: Mark used constants. */
+	for (struct rc_instruction *inst = c->Program.Instructions.Next;
+	     inst != &c->Program.Instructions; inst = inst->Next) {
+		rc_for_all_reads_src(inst, mark_used, &d);
+	}
+
+	/* Pass 2: If there is relative addressing or dead constant elimination
+	 * is disabled, mark all externals as used. */
+	if (has_rel_addr || !c->remove_unused_constants) {
+		for (unsigned i = 0; i < c->Program.Constants.Count; i++)
+			if (constants[i].Type == RC_CONSTANT_EXTERNAL)
+				const_used[i] = 1;
+	}
+
+	/* Pass 3: Make the remapping table and remap constants.
+	 * This pass removes unused constants simply by overwriting them by other constants. */
+	remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
+	inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
+	new_count = 0;
+
+	for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
+		if (const_used[i]) {
+			remap_table[new_count] = i;
+			inv_remap_table[i] = new_count;
+
+			if (i != new_count) {
+				if (constants[i].Type == RC_CONSTANT_EXTERNAL)
+					are_externals_remapped = 1;
+
+				constants[new_count] = constants[i];
+				is_identity = 0;
+			}
+			new_count++;
+		}
+	}
+
+	/*  is_identity ==> new_count == old_count
+	 * !is_identity ==> new_count <  old_count */
+	assert( is_identity || new_count <  c->Program.Constants.Count);
+	assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped));
+
+	/* Pass 4: Redirect reads of all constants to their new locations. */
+	if (!is_identity) {
+		for (struct rc_instruction *inst = c->Program.Instructions.Next;
+		     inst != &c->Program.Instructions; inst = inst->Next) {
+			rc_remap_registers(inst, remap_regs, inv_remap_table);
+		}
+	}
+
+	/* Set the new constant count. Note that new_count may be less than
+	 * Count even though the remapping function is identity. In that case,
+	 * the constants have been removed at the end of the array. */
+	c->Program.Constants.Count = new_count;
+
+	if (are_externals_remapped) {
+		*out_remap_table = remap_table;
+	} else {
+		*out_remap_table = NULL;
+		free(remap_table);
+	}
+
+	free(const_used);
+	free(inv_remap_table);
+
+	if (c->Debug & RC_DBG_LOG)
+		rc_constants_print(&c->Program.Constants);
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_remove_constants.h b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h
new file mode 100644
index 00000000000..f29113b922b
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_remove_constants.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_REMOVE_CONSTANTS_H
+#define RADEON_REMOVE_CONSTANTS_H
+
+#include "radeon_compiler.h"
+
+void rc_remove_unused_constants(struct radeon_compiler *c, void *user);
+
+#endif
diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.c b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c
new file mode 100644
index 00000000000..cafa0579734
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_rename_regs.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+#include "radeon_program.h"
+
+/**
+ * This function renames registers in an attempt to get the code close to
+ * SSA form.  After this function has completed, most of the register are only
+ * written to one time, with a few exceptions.
+ *
+ * This function assumes all the instructions are still of type
+ * RC_INSTRUCTION_NORMAL.
+ */
+void rc_rename_regs(struct radeon_compiler *c, void *user)
+{
+	unsigned int i, used_length;
+	int new_index;
+	struct rc_instruction * inst;
+	struct rc_reader_data reader_data;
+	unsigned char * used;
+
+	/* XXX Remove this once the register allocation works with flow control. */
+	for(inst = c->Program.Instructions.Next;
+					inst != &c->Program.Instructions;
+					inst = inst->Next) {
+		if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
+			return;
+	}
+
+	used_length = 2 * rc_recompute_ips(c);
+	used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length);
+	memset(used, 0, sizeof(unsigned char) * used_length);
+
+	rc_get_used_temporaries(c, used, used_length);
+	for(inst = c->Program.Instructions.Next;
+					inst != &c->Program.Instructions;
+					inst = inst->Next) {
+
+		if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
+			continue;
+
+		reader_data.ExitOnAbort = 1;
+		rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
+
+		if (reader_data.Abort || reader_data.ReaderCount == 0)
+			continue;
+
+		new_index = rc_find_free_temporary_list(c, used, used_length,
+						RC_MASK_XYZW);
+		if (new_index < 0) {
+			rc_error(c, "Ran out of temporary registers\n");
+			return;
+		}
+
+		reader_data.Writer->U.I.DstReg.Index = new_index;
+		for(i = 0; i < reader_data.ReaderCount; i++) {
+			reader_data.Readers[i].U.I.Src->Index = new_index;
+		}
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_rename_regs.h b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h
new file mode 100644
index 00000000000..3baf29f6120
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_rename_regs.h
@@ -0,0 +1,9 @@
+
+#ifndef RADEON_RENAME_REGS_H
+#define RADEON_RENAME_REGS_H
+
+struct radeon_compiler;
+
+void rc_rename_regs(struct radeon_compiler *c, void *user);
+
+#endif /* RADEON_RENAME_REGS_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_swizzle.h b/src/gallium/drivers/r300/compiler/radeon_swizzle.h
new file mode 100644
index 00000000000..c81d5f7a5e9
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_swizzle.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_SWIZZLE_H
+#define RADEON_SWIZZLE_H
+
+#include "radeon_program.h"
+
+struct rc_swizzle_split {
+	unsigned char NumPhases;
+	unsigned char Phase[4];
+};
+
+/**
+ * Describe the swizzling capability of target hardware.
+ */
+struct rc_swizzle_caps {
+	/**
+	 * Check whether the given swizzle, absolute and negate combination
+	 * can be implemented natively by the hardware for this opcode.
+	 *
+	 * \return 1 if the swizzle is native for the given opcode
+	 */
+	int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
+
+	/**
+	 * Determine how to split access to the masked channels of the
+	 * given source register to obtain ALU-native swizzles.
+	 */
+	void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
+};
+
+#endif /* RADEON_SWIZZLE_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.c b/src/gallium/drivers/r300/compiler/radeon_variable.c
new file mode 100644
index 00000000000..938fb8421f2
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_variable.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_variable.h"
+
+#include "memory_pool.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_list.h"
+#include "radeon_opcodes.h"
+#include "radeon_program.h"
+
+/**
+ * Rewrite the index and writemask for the destination register of var
+ * and its friends to new_index and new_writemask.  This function also takes
+ * care of rewriting the swizzles for the sources of var.
+ */
+void rc_variable_change_dst(
+	struct rc_variable * var,
+	unsigned int new_index,
+	unsigned int new_writemask)
+{
+	struct rc_variable * var_ptr;
+	struct rc_list * readers;
+	unsigned int old_mask = rc_variable_writemask_sum(var);
+	unsigned int conversion_swizzle =
+			rc_make_conversion_swizzle(old_mask, new_writemask);
+
+	for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) {
+		if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
+			rc_normal_rewrite_writemask(var_ptr->Inst,
+							conversion_swizzle);
+			var_ptr->Inst->U.I.DstReg.Index = new_index;
+		} else {
+			struct rc_pair_sub_instruction * sub;
+			if (var_ptr->Dst.WriteMask == RC_MASK_W) {
+				assert(new_writemask & RC_MASK_W);
+				sub = &var_ptr->Inst->U.P.Alpha;
+			} else {
+				sub = &var_ptr->Inst->U.P.RGB;
+				rc_pair_rewrite_writemask(sub,
+							conversion_swizzle);
+			}
+			sub->DestIndex = new_index;
+		}
+	}
+
+	readers = rc_variable_readers_union(var);
+
+	for ( ; readers; readers = readers->Next) {
+		struct rc_reader * reader = readers->Item;
+		if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) {
+			reader->U.I.Src->Index = new_index;
+			reader->U.I.Src->Swizzle = rc_rewrite_swizzle(
+				reader->U.I.Src->Swizzle, conversion_swizzle);
+		} else {
+			struct rc_pair_instruction * pair_inst =
+							&reader->Inst->U.P;
+			unsigned int src_type = rc_source_type_swz(
+							reader->U.P.Arg->Swizzle);
+
+			int src_index = reader->U.P.Arg->Source;
+			if (src_index == RC_PAIR_PRESUB_SRC) {
+				src_index = rc_pair_get_src_index(
+						pair_inst, reader->U.P.Src);
+			}
+			/* Try to delete the old src, it is OK if this fails,
+			 * because rc_pair_alloc_source might be able to
+			 * find a source the ca be reused.
+			 */
+			if (rc_pair_remove_src(reader->Inst, src_type,
+							src_index, old_mask)) {
+				/* Reuse the source index of the source that
+				 * was just deleted and set its register
+				 * index.  We can't use rc_pair_alloc_source
+				 * for this becuase it might return a source
+				 * index that is already being used. */
+				if (src_type & RC_SOURCE_RGB) {
+					pair_inst->RGB.Src[src_index]
+						.Used =	1;
+					pair_inst->RGB.Src[src_index]
+						.Index = new_index;
+					pair_inst->RGB.Src[src_index]
+						.File = RC_FILE_TEMPORARY;
+				}
+				if (src_type & RC_SOURCE_ALPHA) {
+					pair_inst->Alpha.Src[src_index]
+						.Used = 1;
+					pair_inst->Alpha.Src[src_index]
+						.Index = new_index;
+					pair_inst->Alpha.Src[src_index]
+						.File = RC_FILE_TEMPORARY;
+				}
+			} else {
+				src_index = rc_pair_alloc_source(
+						&reader->Inst->U.P,
+						src_type & RC_SOURCE_RGB,
+						src_type & RC_SOURCE_ALPHA,
+						RC_FILE_TEMPORARY,
+						new_index);
+				if (src_index < 0) {
+					rc_error(var->C, "Rewrite of inst %u failed "
+						"Can't allocate source for "
+						"Inst %u src_type=%x "
+						"new_index=%u new_mask=%u\n",
+						var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask);
+						continue;
+				}
+			}
+			reader->U.P.Arg->Swizzle = rc_rewrite_swizzle(
+				reader->U.P.Arg->Swizzle, conversion_swizzle);
+			if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) {
+				reader->U.P.Arg->Source = src_index;
+			}
+		}
+	}
+}
+
+/**
+ * Compute the live intervals for var and its friends.
+ */
+void rc_variable_compute_live_intervals(struct rc_variable * var)
+{
+	while(var) {
+		unsigned int i;
+		unsigned int start = var->Inst->IP;
+
+		for (i = 0; i < var->ReaderCount; i++) {
+			unsigned int chan;
+			unsigned int chan_start = start;
+			unsigned int chan_end = var->Readers[i].Inst->IP;
+			unsigned int mask = var->Readers[i].WriteMask;
+			struct rc_instruction * inst;
+
+			/* Extend the live interval of T0 to the start of the
+			 * loop for sequences like:
+			 * BGNLOOP
+			 * read T0
+			 * ...
+			 * write T0
+			 * ENDLOOP
+			 */
+			if (var->Readers[i].Inst->IP < start) {
+				struct rc_instruction * bgnloop =
+					rc_match_endloop(var->Readers[i].Inst);
+				chan_start = bgnloop->IP;
+			}
+
+			/* Extend the live interval of T0 to the start of the
+			 * loop in case there is a BRK instruction in the loop
+			 * (we don't actually check for a BRK instruction we
+			 * assume there is one somewhere in the loop, which
+			 * there usually is) for sequences like:
+			 * BGNLOOP
+			 * ...
+			 * conditional BRK
+			 * ...
+			 * write T0
+			 * ENDLOOP
+			 * read T0
+			 ***************************************************
+			 * Extend the live interval of T0 to the end of the
+			 * loop for sequences like:
+			 * write T0
+			 * BGNLOOP
+			 * ...
+			 * read T0
+			 * ENDLOOP
+			 */
+			for (inst = var->Inst; inst != var->Readers[i].Inst;
+							inst = inst->Next) {
+				rc_opcode op = rc_get_flow_control_inst(inst);
+				if (op == RC_OPCODE_ENDLOOP) {
+					struct rc_instruction * bgnloop =
+						rc_match_endloop(inst);
+					if (bgnloop->IP < chan_start) {
+						chan_start = bgnloop->IP;
+					}
+				} else if (op == RC_OPCODE_BGNLOOP) {
+					struct rc_instruction * endloop =
+						rc_match_bgnloop(inst);
+					if (endloop->IP > chan_end) {
+						chan_end = endloop->IP;
+					}
+				}
+			}
+
+			for (chan = 0; chan < 4; chan++) {
+				if ((mask >> chan) & 0x1) {
+					if (!var->Live[chan].Used
+					|| chan_start < var->Live[chan].Start) {
+						var->Live[chan].Start =
+								chan_start;
+					}
+					if (!var->Live[chan].Used
+					|| chan_end > var->Live[chan].End) {
+						var->Live[chan].End = chan_end;
+					}
+					var->Live[chan].Used = 1;
+				}
+			}
+		}
+		var = var->Friend;
+	}
+}
+
+/**
+ * @return 1 if a and b share a reader
+ * @return 0 if they do not
+ */
+static unsigned int readers_intersect(
+	struct rc_variable * a,
+	struct rc_variable * b)
+{
+	unsigned int a_index, b_index;
+	for (a_index = 0; a_index < a->ReaderCount; a_index++) {
+		struct rc_reader reader_a = a->Readers[a_index];
+		for (b_index = 0; b_index < b->ReaderCount; b_index++) {
+			struct rc_reader reader_b = b->Readers[b_index];
+			if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL
+				&& reader_b.Inst->Type == RC_INSTRUCTION_NORMAL
+				&& reader_a.U.I.Src == reader_b.U.I.Src) {
+
+				return 1;
+			}
+			if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR
+				&& reader_b.Inst->Type == RC_INSTRUCTION_PAIR
+				&& reader_a.U.P.Src == reader_b.U.P.Src) {
+
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+void rc_variable_add_friend(
+	struct rc_variable * var,
+	struct rc_variable * friend)
+{
+	assert(var->Dst.Index == friend->Dst.Index);
+	while(var->Friend) {
+		var = var->Friend;
+	}
+	var->Friend = friend;
+}
+
+struct rc_variable * rc_variable(
+	struct radeon_compiler * c,
+	unsigned int DstFile,
+	unsigned int DstIndex,
+	unsigned int DstWriteMask,
+	struct rc_reader_data * reader_data)
+{
+	struct rc_variable * new =
+			memory_pool_malloc(&c->Pool, sizeof(struct rc_variable));
+	memset(new, 0, sizeof(struct rc_variable));
+	new->C = c;
+	new->Dst.File = DstFile;
+	new->Dst.Index = DstIndex;
+	new->Dst.WriteMask = DstWriteMask;
+	if (reader_data) {
+		new->Inst = reader_data->Writer;
+		new->ReaderCount = reader_data->ReaderCount;
+		new->Readers = reader_data->Readers;
+	}
+	return new;
+}
+
+static void get_variable_helper(
+	struct rc_list ** variable_list,
+	struct rc_variable * variable)
+{
+	struct rc_list * list_ptr;
+	for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) {
+		if (readers_intersect(variable, list_ptr->Item)) {
+			rc_variable_add_friend(list_ptr->Item, variable);
+			return;
+		}
+	}
+	rc_list_add(variable_list, rc_list(&variable->C->Pool, variable));
+}
+
+static void get_variable_pair_helper(
+	struct rc_list ** variable_list,
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	struct rc_pair_sub_instruction * sub_inst)
+{
+	struct rc_reader_data reader_data;
+	struct rc_variable * new_var;
+	rc_register_file file;
+	unsigned int writemask;
+
+	if (sub_inst->Opcode == RC_OPCODE_NOP) {
+		return;
+	}
+	memset(&reader_data, 0, sizeof(struct rc_reader_data));
+	rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL);
+
+	if (reader_data.ReaderCount == 0) {
+		return;
+	}
+
+	if (sub_inst->WriteMask) {
+		file = RC_FILE_TEMPORARY;
+		writemask = sub_inst->WriteMask;
+	} else if (sub_inst->OutputWriteMask) {
+		file = RC_FILE_OUTPUT;
+		writemask = sub_inst->OutputWriteMask;
+	} else {
+		writemask = 0;
+		file = RC_FILE_NONE;
+	}
+	new_var = rc_variable(c, file, sub_inst->DestIndex, writemask,
+								&reader_data);
+	get_variable_helper(variable_list, new_var);
+}
+
+/**
+ * Generate a list of variables used by the shader program.  Each instruction
+ * that writes to a register is considered a variable.  The struct rc_variable
+ * data structure includes a list of readers and is essentially a
+ * definition-use chain.  Any two variables that share a reader are considered
+ * "friends" and they are linked together via the Friend attribute.
+ */
+struct rc_list * rc_get_variables(struct radeon_compiler * c)
+{
+	struct rc_instruction * inst;
+	struct rc_list * variable_list = NULL;
+
+	for (inst = c->Program.Instructions.Next;
+					inst != &c->Program.Instructions;
+					inst = inst->Next) {
+		struct rc_reader_data reader_data;
+		struct rc_variable * new_var;
+		memset(&reader_data, 0, sizeof(reader_data));
+
+		if (inst->Type == RC_INSTRUCTION_NORMAL) {
+			rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
+			if (reader_data.ReaderCount == 0) {
+				continue;
+			}
+			new_var = rc_variable(c, inst->U.I.DstReg.File,
+				inst->U.I.DstReg.Index,
+				inst->U.I.DstReg.WriteMask, &reader_data);
+			get_variable_helper(&variable_list, new_var);
+		} else {
+			get_variable_pair_helper(&variable_list, c, inst,
+							&inst->U.P.RGB);
+			get_variable_pair_helper(&variable_list, c, inst,
+							&inst->U.P.Alpha);
+		}
+	}
+
+	return variable_list;
+}
+
+/**
+ * @return The bitwise or of the writemasks of a variable and all of its
+ * friends.
+ */
+unsigned int rc_variable_writemask_sum(struct rc_variable * var)
+{
+	unsigned int writemask = 0;
+	while(var) {
+		writemask |= var->Dst.WriteMask;
+		var = var->Friend;
+	}
+	return writemask;
+}
+
+/*
+ * @return A list of readers for a variable and its friends.  Readers
+ * that read from two different variable friends are only included once in
+ * this list.
+ */
+struct rc_list * rc_variable_readers_union(struct rc_variable * var)
+{
+	struct rc_list * list = NULL;
+	while (var) {
+		unsigned int i;
+		for (i = 0; i < var->ReaderCount; i++) {
+			struct rc_list * temp;
+			struct rc_reader * a = &var->Readers[i];
+			unsigned int match = 0;
+			for (temp = list; temp; temp = temp->Next) {
+				struct rc_reader * b = temp->Item;
+				if (a->Inst->Type != b->Inst->Type) {
+					continue;
+				}
+				if (a->Inst->Type == RC_INSTRUCTION_NORMAL) {
+					if (a->U.I.Src == b->U.I.Src) {
+						match = 1;
+						break;
+					}
+				}
+				if (a->Inst->Type == RC_INSTRUCTION_PAIR) {
+					if (a->U.P.Arg == b->U.P.Arg
+					    && a->U.P.Src == b->U.P.Src) {
+						match = 1;
+						break;
+					}
+				}
+			}
+			if (match) {
+				continue;
+			}
+			rc_list_add(&list, rc_list(&var->C->Pool, a));
+		}
+		var = var->Friend;
+	}
+	return list;
+}
+
+static unsigned int reader_equals_src(
+	struct rc_reader reader,
+	unsigned int src_type,
+	void * src)
+{
+	if (reader.Inst->Type != src_type) {
+		return 0;
+	}
+	if (src_type == RC_INSTRUCTION_NORMAL) {
+		return reader.U.I.Src == src;
+	} else {
+		return reader.U.P.Src == src;
+	}
+}
+
+static unsigned int variable_writes_src(
+	struct rc_variable * var,
+	unsigned int src_type,
+	void * src)
+{
+	unsigned int i;
+	for (i = 0; i < var->ReaderCount; i++) {
+		if (reader_equals_src(var->Readers[i], src_type, src)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+
+struct rc_list * rc_variable_list_get_writers(
+	struct rc_list * var_list,
+	unsigned int src_type,
+	void * src)
+{
+	struct rc_list * list_ptr;
+	struct rc_list * writer_list = NULL;
+	for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) {
+		struct rc_variable * var = list_ptr->Item;
+		if (variable_writes_src(var, src_type, src)) {
+			struct rc_variable * friend;
+			rc_list_add(&writer_list, rc_list(&var->C->Pool, var));
+			for (friend = var->Friend; friend;
+						friend = friend->Friend) {
+				if (variable_writes_src(friend, src_type, src)) {
+					rc_list_add(&writer_list,
+						rc_list(&var->C->Pool, friend));
+				}
+			}
+			/* Once we have indentifed the variable and its
+			 * friends that write this source, we can stop
+			 * stop searching, because we know know of the
+			 * other variables in the list will write this source.
+			 * If they did they would be friends of var.
+			 */
+			break;
+		}
+	}
+	return writer_list;
+}
+
+void rc_variable_print(struct rc_variable * var)
+{
+	unsigned int i;
+	while (var) {
+		fprintf(stderr, "%u: TEMP[%u].%u: ",
+			var->Inst->IP, var->Dst.Index, var->Dst.WriteMask);
+		for (i = 0; i < 4; i++) {
+			fprintf(stderr, "chan %u: start=%u end=%u ", i,
+					var->Live[i].Start, var->Live[i].End);
+		}
+		fprintf(stderr, "%u readers\n", var->ReaderCount);
+		if (var->Friend) {
+			fprintf(stderr, "Friend: \n\t");
+		}
+		var = var->Friend;
+	}
+}
diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.h b/src/gallium/drivers/r300/compiler/radeon_variable.h
new file mode 100644
index 00000000000..9427bee18a7
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_variable.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_VARIABLE_H
+#define RADEON_VARIABLE_H
+
+#include "radeon_compiler.h"
+
+struct radeon_compiler;
+struct rc_list;
+struct rc_reader_data;
+struct rc_readers;
+
+struct live_intervals {
+	int Start;
+	int End;
+	int Used;
+};
+
+struct rc_variable {
+	struct radeon_compiler * C;
+	struct rc_dst_register Dst;
+
+	struct rc_instruction * Inst;
+	unsigned int ReaderCount;
+	struct rc_reader * Readers;
+	struct live_intervals Live[4];
+
+	/* A friend is a variable that shares a reader with another variable.
+	 */
+	struct rc_variable * Friend;
+};
+
+void rc_variable_change_dst(
+	struct rc_variable * var,
+	unsigned int new_index,
+	unsigned int new_writemask);
+
+void rc_variable_compute_live_intervals(struct rc_variable * var);
+
+void rc_variable_add_friend(
+	struct rc_variable * var,
+	struct rc_variable * friend);
+
+struct rc_variable * rc_variable(
+	struct radeon_compiler * c,
+	unsigned int DstFile,
+	unsigned int DstIndex,
+	unsigned int DstWriteMask,
+	struct rc_reader_data * reader_data);
+
+struct rc_list * rc_get_variables(struct radeon_compiler * c);
+
+unsigned int rc_variable_writemask_sum(struct rc_variable * var);
+
+struct rc_list * rc_variable_readers_union(struct rc_variable * var);
+
+struct rc_list * rc_variable_list_get_writers(
+	struct rc_list * var_list,
+	unsigned int src_type,
+	void * src);
+
+void rc_variable_print(struct rc_variable * var);
+
+#endif /* RADEON_VARIABLE_H */
diff --git a/src/gallium/drivers/r300/compiler/tests/.gitignore b/src/gallium/drivers/r300/compiler/tests/.gitignore
new file mode 100644
index 00000000000..85672fed777
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/.gitignore
@@ -0,0 +1 @@
+radeon_compiler_util_tests
diff --git a/src/gallium/drivers/r300/compiler/tests/Makefile b/src/gallium/drivers/r300/compiler/tests/Makefile
new file mode 100644
index 00000000000..6eda34a2c00
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/Makefile
@@ -0,0 +1,53 @@
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+CFLAGS += -Wall -Werror
+
+### Basic defines ###
+TESTS =	radeon_compiler_util_tests
+
+TEST_SOURCES := $(TESTS:=.c)
+
+SHARED_SOURCES =		\
+	rc_test_helpers.c	\
+	unit_test.c
+
+C_SOURCES = $(SHARED_SOURCES) $(TEST_SOURCES)
+
+INCLUDES = \
+	-I. \
+	-I..
+
+COMPILER_LIB = ../../libr300.a
+
+##### TARGETS #####
+
+default: depend run_tests
+
+depend: $(C_SOURCES)
+	rm -f depend
+	touch depend
+	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $^ 2> /dev/null
+
+# Remove .o and backup files
+clean:
+	rm -f $(TESTS) depend depend.bak
+
+$(TESTS): $(TESTS:=.o) $(SHARED_SOURCES:.c=.o) $(COMPILER_LIB)
+	$(APP_CC) -o $@ $^
+
+run_tests: $(TESTS)
+	@echo "RUNNING TESTS:"
+	@echo ""
+	$(foreach test, $^, @./$(test))
+
+.PHONY: $(COMPILER_LIB)
+$(COMPILER_LIB):
+	$(MAKE) -C ../..
+
+##### RULES #####
+.c.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+
+sinclude depend
diff --git a/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c
new file mode 100644
index 00000000000..a2e3f2ab2e5
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_util_tests.c
@@ -0,0 +1,76 @@
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "radeon_compiler_util.h"
+#include "radeon_program.h"
+
+#include "rc_test_helpers.h"
+#include "unit_test.h"
+
+static void test_rc_inst_can_use_presub(
+	struct test_result * result,
+	int expected,
+	const char * add_str,
+	const char * replace_str)
+{
+	struct rc_instruction add_inst, replace_inst;
+	int ret;
+
+	test_begin(result);
+	init_rc_normal_instruction(&add_inst, add_str);
+	init_rc_normal_instruction(&replace_inst, replace_str);
+
+	ret = rc_inst_can_use_presub(&replace_inst, RC_PRESUB_ADD, 0,
+			&replace_inst.U.I.SrcReg[0],
+			&add_inst.U.I.SrcReg[0], &add_inst.U.I.SrcReg[1]);
+
+	test_check(result, ret == expected);
+}
+
+static void test_runner_rc_inst_can_use_presub(struct test_result * result)
+{
+
+	/* This tests the case where the source being replace has the same
+	 * register file and register index as another source register in the
+	 * CMP instruction.  A previous version of this function was ignoring
+	 * all registers that shared the same file and index as the replacement
+	 * register when counting the number of source selects.
+	 *
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=36527
+	 */
+	test_rc_inst_can_use_presub(result, 0,
+		"ADD temp[0].z, temp[6].__x_, const[1].__x_;",
+		"CMP temp[0].y, temp[0]._z__, const[0]._z__, temp[0]._y__;");
+
+
+	/* Testing a random case that should fail
+	 *
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=36527
+	 */
+	test_rc_inst_can_use_presub(result, 0,
+		"ADD temp[3], temp[1], temp[2];",
+		"MAD temp[1], temp[0], const[0].xxxx, -temp[3];");
+
+	/* This tests the case where the arguments of the ADD
+	 * instruction share the same register file and index.  Normally, we
+	 * would need only one source select for these two arguments, but since
+	 * they will be part of a presubtract operation we need to use the two
+	 * source selects that the presubtract instruction expects
+	 * (src0 and src1).
+	 *
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=36527
+	 */
+	test_rc_inst_can_use_presub(result, 0,
+		"ADD temp[3].x, temp[0].x___, temp[0].x___;",
+		"MAD temp[0].xyz, temp[2].xyz_, -temp[3].xxx_, input[5].xyz_;");
+}
+
+int main(int argc, char ** argv)
+{
+	struct test tests[] = {
+		{"rc_inst_can_use_presub()", test_runner_rc_inst_can_use_presub},
+		{NULL, NULL}
+	};
+	run_tests(tests);
+}
diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c
new file mode 100644
index 00000000000..ca4738af54d
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c
@@ -0,0 +1,380 @@
+#include <errno.h>
+#include <regex.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "../radeon_compiler_util.h"
+#include "../radeon_opcodes.h"
+#include "../radeon_program.h"
+
+#include "rc_test_helpers.h"
+
+/* This file contains some helper functions for filling out the rc_instruction
+ * data structures.  These functions take a string as input based on the format
+ * output by rc_program_print().
+ */
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+#define REGEX_ERR_BUF_SIZE 50
+
+struct match_info {
+	const char * String;
+	int Length;
+};
+
+static int match_length(regmatch_t * matches, int index)
+{
+	return matches[index].rm_eo - matches[index].rm_so;
+}
+
+static int regex_helper(
+	const char * regex_str,
+	const char * search_str,
+	regmatch_t * matches,
+	int num_matches)
+{
+	char err_buf[REGEX_ERR_BUF_SIZE];
+	regex_t regex;
+	int err_code;
+	unsigned int i;
+
+	err_code = regcomp(&regex, regex_str, REG_EXTENDED);
+	if (err_code) {
+		regerror(err_code, &regex, err_buf, REGEX_ERR_BUF_SIZE);
+		fprintf(stderr, "Failed to compile regex: %s\n", err_buf);
+		return 0;
+	}
+
+	err_code = regexec(&regex, search_str, num_matches, matches, 0);
+	DBG("Search string: '%s'\n", search_str);
+	for (i = 0; i < num_matches; i++) {
+		DBG("Match %u start = %d end = %d\n", i,
+					matches[i].rm_so, matches[i].rm_eo);
+	}
+	if (err_code) {
+		regerror(err_code, &regex, err_buf, REGEX_ERR_BUF_SIZE);
+		fprintf(stderr, "Failed to match regex: %s\n", err_buf);
+		return 0;
+	}
+	return 1;
+}
+
+#define REGEX_SRC_MATCHES 6
+
+struct src_tokens {
+	struct match_info Negate;
+	struct match_info Abs;
+	struct match_info File;
+	struct match_info Index;
+	struct match_info Swizzle;
+};
+
+/**
+ * Initialize the source register at index src_index for the instruction based
+ * on src_str.
+ *
+ * NOTE: Warning in init_rc_normal_instruction() applies to this function as
+ * well.
+ *
+ * @param src_str A string that represents the source register.  The format for
+ * this string is the same that is output by rc_program_print.
+ * @return 1 On success, 0 on failure
+ */
+int init_rc_normal_src(
+	struct rc_instruction * inst,
+	unsigned int src_index,
+	const char * src_str)
+{
+	const char * regex_str = "(-*)(\\|*)([[:lower:]]*)\\[([[:digit:]])\\](\\.*[[:lower:]-]*)";
+	regmatch_t matches[REGEX_SRC_MATCHES];
+	struct src_tokens tokens;
+	struct rc_src_register * src_reg = &inst->U.I.SrcReg[src_index];
+	unsigned int i;
+
+	/* Execute the regex */
+	if (!regex_helper(regex_str, src_str, matches, REGEX_SRC_MATCHES)) {
+		fprintf(stderr, "Failed to execute regex for src register.\n");
+		return 0;
+	}
+
+	/* Create Tokens */
+	tokens.Negate.String = src_str + matches[1].rm_so;
+	tokens.Negate.Length = match_length(matches, 1);
+	tokens.Abs.String = src_str + matches[2].rm_so;
+	tokens.Abs.Length = match_length(matches, 2);
+	tokens.File.String = src_str + matches[3].rm_so;
+	tokens.File.Length = match_length(matches, 3);
+	tokens.Index.String = src_str + matches[4].rm_so;
+	tokens.Index.Length = match_length(matches, 4);
+	tokens.Swizzle.String = src_str + matches[5].rm_so;
+	tokens.Swizzle.Length = match_length(matches, 5);
+
+	/* Negate */
+	if (tokens.Negate.Length  > 0) {
+		src_reg->Negate = RC_MASK_XYZW;
+	}
+
+	/* Abs */
+	if (tokens.Abs.Length > 0) {
+		src_reg->Abs = 1;
+	}
+
+	/* File */
+	if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) {
+		src_reg->File = RC_FILE_TEMPORARY;
+	} else if (!strncmp(tokens.File.String, "input", tokens.File.Length)) {
+		src_reg->File = RC_FILE_INPUT;
+	} else if (!strncmp(tokens.File.String, "const", tokens.File.Length)) {
+		src_reg->File = RC_FILE_CONSTANT;
+	} else if (!strncmp(tokens.File.String, "none", tokens.File.Length)) {
+		src_reg->File = RC_FILE_NONE;
+	}
+
+	/* Index */
+	errno = 0;
+	src_reg->Index = strtol(tokens.Index.String, NULL, 10);
+	if (errno > 0) {
+		fprintf(stderr, "Could not convert src register index.\n");
+		return 0;
+	}
+
+	/* Swizzle */
+	if (tokens.Swizzle.Length == 0) {
+		src_reg->Swizzle = RC_SWIZZLE_XYZW;
+	} else {
+		int str_index = 1;
+		src_reg->Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED);
+		if (tokens.Swizzle.String[0] != '.') {
+			fprintf(stderr, "First char of swizzle is not valid.\n");
+			return 0;
+		}
+		for (i = 0; i < 4; i++, str_index++) {
+			if (tokens.Swizzle.String[str_index] == '-') {
+				src_reg->Negate |= (1 << i);
+				str_index++;
+			}
+			switch(tokens.Swizzle.String[str_index]) {
+			case 'x':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_X);
+				break;
+			case 'y':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Y);
+				break;
+			case 'z':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_Z);
+				break;
+			case 'w':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_W);
+				break;
+			case '1':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ONE);
+				break;
+			case '0':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_ZERO);
+				break;
+			case 'H':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_HALF);
+				break;
+			case '_':
+				SET_SWZ(src_reg->Swizzle, i, RC_SWIZZLE_UNUSED);
+				break;
+			default:
+				fprintf(stderr, "Unknown src register swizzle.\n");
+				return 0;
+			}
+		}
+	}
+	DBG("File=%u index=%u swizzle=%x negate=%u abs=%u\n",
+			src_reg->File, src_reg->Index, src_reg->Swizzle,
+			src_reg->Negate, src_reg->Abs);
+	return 1;
+}
+
+#define REGEX_DST_MATCHES 4
+
+struct dst_tokens {
+	struct match_info File;
+	struct match_info Index;
+	struct match_info WriteMask;
+};
+
+/**
+ * Initialize the destination for the instruction based on dst_str.
+ *
+ * NOTE: Warning in init_rc_normal_instruction() applies to this function as
+ * well.
+ *
+ * @param dst_str A string that represents the destination register.  The format
+ * for this string is the same that is output by rc_program_print.
+ * @return 1 On success, 0 on failure
+ */
+int init_rc_normal_dst(
+	struct rc_instruction * inst,
+	const char * dst_str)
+{
+	const char * regex_str = "([[:lower:]]*)\\[([[:digit:]]*)\\](\\.*[[:lower:]]*)";
+	regmatch_t matches[REGEX_DST_MATCHES];
+	struct dst_tokens tokens;
+	unsigned int i;
+
+	/* Execute the regex */
+	if (!regex_helper(regex_str, dst_str, matches, REGEX_DST_MATCHES)) {
+		fprintf(stderr, "Failed to execute regex for dst register.\n");
+		return 0;
+	}
+
+	/* Create Tokens */
+	tokens.File.String = dst_str + matches[1].rm_so;
+	tokens.File.Length = match_length(matches, 1);
+	tokens.Index.String = dst_str + matches[2].rm_so;
+	tokens.Index.Length = match_length(matches, 2);
+	tokens.WriteMask.String = dst_str + matches[3].rm_so;
+	tokens.WriteMask.Length = match_length(matches, 3);
+
+	/* File Type */
+	if (!strncmp(tokens.File.String, "temp", tokens.File.Length)) {
+		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	} else if (!strncmp(tokens.File.String, "output", tokens.File.Length)) {
+		inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+	} else {
+		fprintf(stderr, "Unknown dst register file type.\n");
+		return 0;
+	}
+
+	/* File Index */
+	errno = 0;
+	inst->U.I.DstReg.Index = strtol(tokens.Index.String, NULL, 10);
+
+	if (errno > 0) {
+		fprintf(stderr, "Could not convert dst register index\n");
+		return 0;
+	}
+
+	/* WriteMask */
+	if (tokens.WriteMask.Length == 0) {
+		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	} else {
+		/* The first character should be '.' */
+		if (tokens.WriteMask.String[0] != '.') {
+			fprintf(stderr, "1st char of writemask is not valid.\n");
+			return 0;
+		}
+		for (i = 1; i < tokens.WriteMask.Length; i++) {
+			switch(tokens.WriteMask.String[i]) {
+			case 'x':
+				inst->U.I.DstReg.WriteMask |= RC_MASK_X;
+				break;
+			case 'y':
+				inst->U.I.DstReg.WriteMask |= RC_MASK_Y;
+				break;
+			case 'z':
+				inst->U.I.DstReg.WriteMask |= RC_MASK_Z;
+				break;
+			case 'w':
+				inst->U.I.DstReg.WriteMask |= RC_MASK_W;
+				break;
+			default:
+				fprintf(stderr, "Unknown swizzle in writemask.\n");
+				return 0;
+			}
+		}
+	}
+	DBG("Dst Reg File=%u Index=%d Writemask=%d\n",
+			inst->U.I.DstReg.File,
+			inst->U.I.DstReg.Index,
+			inst->U.I.DstReg.WriteMask);
+	return 1;
+}
+
+#define REGEX_INST_MATCHES 7
+
+struct inst_tokens {
+	struct match_info Opcode;
+	struct match_info Sat;
+	struct match_info Dst;
+	struct match_info Srcs[3];
+};
+
+/**
+ * Initialize a normal instruction based on inst_str.
+ *
+ * WARNING: This function might not be able to handle every kind of format that
+ * rc_program_print() can output.  If you are having problems with a
+ * particular string, you may need to add support for it to this functions.
+ *
+ * @param inst_str A string that represents the source register.  The format for
+ * this string is the same that is output by rc_program_print.
+ * @return 1 On success, 0 on failure
+ */
+int init_rc_normal_instruction(
+	struct rc_instruction * inst,
+	const char * inst_str)
+{
+	const char * regex_str = "([[:upper:]]+)(_SAT)* ([^,]*)[, ]*([^,]*)[, ]*([^,]*)[, ]*([^;]*)";
+	int i;
+	regmatch_t matches[REGEX_INST_MATCHES];
+	struct inst_tokens tokens;
+
+	/* Initialize inst */
+	memset(inst, 0, sizeof(struct rc_instruction));
+	inst->Type = RC_INSTRUCTION_NORMAL;
+
+	/* Execute the regex */
+	if (!regex_helper(regex_str, inst_str, matches, REGEX_INST_MATCHES)) {
+		return 0;
+	}
+	memset(&tokens, 0, sizeof(tokens));
+
+	/* Create Tokens */
+	tokens.Opcode.String = inst_str + matches[1].rm_so;
+	tokens.Opcode.Length = match_length(matches, 1);
+	if (matches[2].rm_so > -1) {
+		tokens.Sat.String = inst_str + matches[2].rm_so;
+		tokens.Sat.Length = match_length(matches, 2);
+	}
+
+
+	/* Fill out the rest of the instruction. */
+	for (i = 0; i < MAX_RC_OPCODE; i++) {
+		const struct rc_opcode_info * info = rc_get_opcode_info(i);
+		unsigned int first_src = 3;
+		unsigned int j;
+		if (strncmp(tokens.Opcode.String, info->Name, tokens.Opcode.Length)) {
+			continue;
+		}
+		inst->U.I.Opcode = info->Opcode;
+		if (info->HasDstReg) {
+			char * dst_str;
+			tokens.Dst.String = inst_str + matches[3].rm_so;
+			tokens.Dst.Length = match_length(matches, 3);
+			first_src++;
+
+			dst_str = malloc(sizeof(char) * (tokens.Dst.Length + 1));
+			strncpy(dst_str, tokens.Dst.String, tokens.Dst.Length);
+			dst_str[tokens.Dst.Length] = '\0';
+			init_rc_normal_dst(inst, dst_str);
+			free(dst_str);
+		}
+		for (j = 0; j < info->NumSrcRegs; j++) {
+			char * src_str;
+			tokens.Srcs[j].String =
+				inst_str + matches[first_src + j].rm_so;
+			tokens.Srcs[j].Length =
+				match_length(matches, first_src + j);
+
+			src_str = malloc(sizeof(char) *
+						(tokens.Srcs[j].Length + 1));
+			strncpy(src_str, tokens.Srcs[j].String,
+						tokens.Srcs[j].Length);
+			src_str[tokens.Srcs[j].Length] = '\0';
+			init_rc_normal_src(inst, j, src_str);
+		}
+		break;
+	}
+	return 1;
+}
diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h
new file mode 100644
index 00000000000..1a6bf9699ba
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.h
@@ -0,0 +1,13 @@
+
+int init_rc_normal_src(
+	struct rc_instruction * inst,
+	unsigned int src_index,
+	const char * src_str);
+
+int init_rc_normal_dst(
+	struct rc_instruction * inst,
+	const char * dst_str);
+
+int init_rc_normal_instruction(
+	struct rc_instruction * inst,
+	const char * inst_str);
diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.c b/src/gallium/drivers/r300/compiler/tests/unit_test.c
new file mode 100644
index 00000000000..266f3365c58
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/unit_test.c
@@ -0,0 +1,35 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "unit_test.h"
+
+void run_tests(struct test tests[])
+{
+	int i;
+	for (i = 0; tests[i].name; i++) {
+		printf("Test %s\n", tests[i].name);
+		memset(&tests[i].result, 0, sizeof(tests[i].result));
+		tests[i].test_func(&tests[i].result);
+		printf("Test %s (%d/%d) pass\n", tests[i].name,
+			tests[i].result.pass, tests[i].result.test_count);
+	}
+}
+
+void test_begin(struct test_result * result)
+{
+	result->test_count++;
+}
+
+void test_check(struct test_result * result, int cond)
+{
+	printf("Subtest %u -> ", result->test_count);
+	if (cond) {
+		result->pass++;
+		printf("Pass");
+	} else {
+		result->fail++;
+		printf("Fail");
+	}
+	printf("\n");
+}
diff --git a/src/gallium/drivers/r300/compiler/tests/unit_test.h b/src/gallium/drivers/r300/compiler/tests/unit_test.h
new file mode 100644
index 00000000000..441e8b655a5
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/tests/unit_test.h
@@ -0,0 +1,17 @@
+
+struct test_result {
+	unsigned int test_count;
+	unsigned int pass;
+	unsigned int fail;
+};
+
+struct test {
+	const char * name;
+	void (*test_func)(struct test_result * result);
+	struct test_result result;
+};
+
+void run_tests(struct test tests[]);
+
+void test_begin(struct test_result * result);
+void test_check(struct test_result * result, int cond);
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 6c1c9d2fb13..234e043b071 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -24,7 +24,6 @@
 #define R300_EMIT_H
 
 #include "r300_context.h"
-#include "radeon_code.h"
 
 struct rX00_fragment_program_code;
 struct r300_vertex_program_code;
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index e3a1bc4a0f4..a9fd3ad40dd 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -38,8 +38,7 @@
 #include "r300_texture.h"
 #include "r300_tgsi_to_rc.h"
 
-#include "radeon_code.h"
-#include "radeon_compiler.h"
+#include "compiler/radeon_compiler.h"
 
 /* Convert info about FS input semantics to r300_shader_semantics. */
 void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index c86a90b85ae..45c9e8801c3 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -27,7 +27,7 @@
 
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_scan.h"
-#include "radeon_code.h"
+#include "compiler/radeon_code.h"
 #include "r300_shader_semantics.h"
 
 struct r300_fragment_shader_code {
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index bb30b1ab0be..5edbb22a743 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -2078,7 +2078,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_ALU_OUTC_D2A                (3 << 23)
 #       define R300_ALU_OUTC_MIN                (4 << 23)
 #       define R300_ALU_OUTC_MAX                (5 << 23)
-#       define R300_ALU_OUTC_CMPH               (7 << 23)
+#       define R300_ALU_OUTC_CND                (7 << 23)
 #       define R300_ALU_OUTC_CMP                (8 << 23)
 #       define R300_ALU_OUTC_FRC                (9 << 23)
 #       define R300_ALU_OUTC_REPL_ALPHA         (10 << 23)
@@ -2944,6 +2944,23 @@ enum {
 
 /*\}*/
 
+#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class)	\
+	 (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT)	\
+	 | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT)	\
+	 | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT)	\
+	 | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT)	\
+	 | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT)	/* X Y Z W */	\
+	 | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
+
+#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate)	\
+	(((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT)				\
+	 | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT)			\
+	 | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT)			\
+	 | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT)			\
+	 | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT)			\
+	 | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT)	/* X Y Z W */				\
+	 | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
+
 /* BEGIN: Packet 3 commands */
 
 /* A primitive emission dword. */
@@ -3249,6 +3266,8 @@ enum {
 #   define R500_INST_RGB_CLAMP				(1 << 19)
 #   define R500_INST_ALPHA_CLAMP			(1 << 20)
 #   define R500_INST_ALU_RESULT_SEL			(1 << 21)
+#   define R500_INST_ALU_RESULT_SEL_RED			(0 << 21)
+#   define R500_INST_ALU_RESULT_SEL_ALPHA		(1 << 21)
 #   define R500_INST_ALPHA_PRED_INV			(1 << 22)
 #   define R500_INST_ALU_RESULT_OP_EQ			(0 << 23)
 #   define R500_INST_ALU_RESULT_OP_LT			(1 << 23)
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 0561ab9bfa4..07a3f3caee7 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -22,8 +22,7 @@
 
 #include "r300_tgsi_to_rc.h"
 
-#include "radeon_compiler.h"
-#include "radeon_program.h"
+#include "compiler/radeon_compiler.h"
 
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index b319890157f..a5e8fd680ff 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -32,7 +32,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_ureg.h"
 
-#include "radeon_compiler.h"
+#include "compiler/radeon_compiler.h"
 
 /* Convert info about VS output semantics into r300_shader_semantics. */
 static void r300_shader_read_vs_outputs(
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
index 170de6c79db..a482ddce9c9 100644
--- a/src/gallium/drivers/r300/r300_vs.h
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -26,7 +26,7 @@
 
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_scan.h"
-#include "radeon_code.h"
+#include "compiler/radeon_code.h"
 
 #include "r300_context.h"
 #include "r300_shader_semantics.h"

From d4d5e3a336f4c1f2208faad57a985f711b09d86d Mon Sep 17 00:00:00 2001
From: Tobias Droste <tdroste@gmx.de>
Date: Thu, 14 Jul 2011 22:32:58 +0200
Subject: [PATCH 098/600] egl/gallium: fix build without softpipe and llvmpipe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Tobias Droste <tdroste@gmx.de>
Acked-by: Jakob Bornecrantz <wallbraker@gmail.com>
Reviewed-by: Marek Olšák <maraeo@gmail.com>
---
 src/gallium/targets/egl-static/Makefile | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/targets/egl-static/Makefile b/src/gallium/targets/egl-static/Makefile
index 69e7eecdf0c..5b7b330a1cd 100644
--- a/src/gallium/targets/egl-static/Makefile
+++ b/src/gallium/targets/egl-static/Makefile
@@ -141,10 +141,18 @@ egl_LIBS += \
 	$(TOP)/src/gallium/drivers/svga/libsvga.a
 endif
 
-# swrast
+# softpipe
+ifneq ($(findstring softpipe,$(GALLIUM_DRIVERS_DIRS)),)
 egl_CPPFLAGS += -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE
 egl_LIBS += $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a
 egl_SYS += -lm
+endif
+
+# llvmpipe
+ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),)
+egl_CPPFLAGS += -DGALLIUM_LLVMPIPE
+egl_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
+endif
 
 # sort to remove duplicates
 egl_CPPFLAGS := $(sort $(egl_CPPFLAGS))
@@ -158,8 +166,6 @@ st_GL_SYS := -lm -lpthread $(DLOPEN_LIBS)
 
 # LLVM
 ifeq ($(MESA_LLVM),1)
-egl_CPPFLAGS += -DGALLIUM_LLVMPIPE
-egl_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
 egl_SYS += $(LLVM_LIBS)
 LDFLAGS += $(LLVM_LDFLAGS)
 

From 79dcfb266aa6ff14ff21c0b6dddef6060b450c32 Mon Sep 17 00:00:00 2001
From: Benjamin Franzke <benjaminfranzke@googlemail.com>
Date: Wed, 27 Jul 2011 10:04:51 +0200
Subject: [PATCH 099/600] wayland-drm: Add copyright notice to protocol

Fixes build since wayland 986703ac7365bc87a5501714adb9fc73157c62b7.
---
 .../wayland-drm/protocol/wayland-drm.xml      | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml b/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml
index 0331f124e80..cde943060ca 100644
--- a/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml
+++ b/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml
@@ -1,5 +1,32 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <protocol name="drm">
+
+  <copyright>
+    Copyright © 2008-2011 Kristian Høgsberg
+    Copyright © 2010-2011 Intel Corporation
+
+    Permission to use, copy, modify, distribute, and sell this
+    software and its documentation for any purpose is hereby granted
+    without fee, provided that\n the above copyright notice appear in
+    all copies and that both that copyright notice and this permission
+    notice appear in supporting documentation, and that the name of
+    the copyright holders not be used in advertising or publicity
+    pertaining to distribution of the software without specific,
+    written prior permission.  The copyright holders make no
+    representations about the suitability of this software for any
+    purpose.  It is provided "as is" without express or implied
+    warranty.
+
+    THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+    SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+    FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+    WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+    AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+    ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+    THIS SOFTWARE.
+  </copyright>
+
   <!-- drm support. This object is created by the server and published
        using the display's global event. -->
   <interface name="wl_drm" version="1">

From 58c04435b12a104b1996fac4d3a3d345f31bd4e7 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 27 Jul 2011 12:13:37 +0200
Subject: [PATCH 100/600] mesa: don't forget about sampleBuffers in framebuffer
 visual update

Otherwise multisample will never been enabled for multisample
renderbuffers.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/framebuffer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
index e27569a6fac..23fa1b2c11e 100644
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -548,6 +548,7 @@ _mesa_update_framebuffer_visual(struct gl_context *ctx,
             fb->Visual.rgbBits = fb->Visual.redBits
                + fb->Visual.greenBits + fb->Visual.blueBits;
             fb->Visual.samples = rb->NumSamples;
+            fb->Visual.sampleBuffers = rb->NumSamples > 0 ? 1 : 0;
             if (_mesa_get_format_color_encoding(fmt) == GL_SRGB)
                 fb->Visual.sRGBCapable = ctx->Const.sRGBCapable;
             break;

From 5e1b7097f3d6fa60e563c8d629bbda1c34efb3c1 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 27 Jul 2011 11:35:31 -0700
Subject: [PATCH 101/600] glsl: Remove completed items from the TODO list

---
 src/glsl/TODO | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/src/glsl/TODO b/src/glsl/TODO
index a3762384ff2..72133995cea 100644
--- a/src/glsl/TODO
+++ b/src/glsl/TODO
@@ -11,28 +11,12 @@
 
 1.30 features:
 
-- Implement AST-to-HIR conversion of bit-shift operators.
-
-- Implement AST-to-HIR conversion of bit-wise {&,|,^,!} operators.
-
 - Implement AST-to-HIR conversion of switch-statements
   - switch
   - case
   - Update break to correcly handle mixed nexting of switch-statements
     and loops.
 
-- Handle currently unsupported constant expression types
-  - ir_unop_bit_not
-  - ir_binop_mod
-  - ir_binop_lshift
-  - ir_binop_rshift
-  - ir_binop_bit_and
-  - ir_binop_bit_xor
-  - ir_binop_bit_or
-
-- Implement support for 1.30 style shadow compares which only return a float
-  instead of a vec4.
-
 - Implement support for gl_ClipDistance.  This is non-trivial because
   gl_ClipDistance is exposed as a float[8], but all hardware actually
   implements it as vec4[2].
\ No newline at end of file

From f622c6d7a23c480f6a17e4b3f81731231180e019 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 27 Jul 2011 11:37:30 -0700
Subject: [PATCH 102/600] glsl: Add source location tracking to TODO list

---
 src/glsl/TODO | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/glsl/TODO b/src/glsl/TODO
index 72133995cea..c99d7e152d6 100644
--- a/src/glsl/TODO
+++ b/src/glsl/TODO
@@ -9,6 +9,11 @@
 - Implement support for ir_binop_dot in ir_algebraic.cpp.  Perform
   transformations such as "dot(v, vec3(0.0, 1.0, 0.0))" -> v.y.
 
+- Track source locations throughout the IR.  There are currently several
+  places where we cannot emit line numbers for errors (and currently emit 0:0)
+  because we've "lost" the line number information.  This is particularly
+  noticeable at link time.
+
 1.30 features:
 
 - Implement AST-to-HIR conversion of switch-statements

From c6f59fcd00101a2f93a5a97d679f3b160ef0126a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 26 Jul 2011 01:05:13 +0200
Subject: [PATCH 103/600] configure.ac: fix xlib-based softpipe build

Tested-by: Jon TURNEY <jon.turney@dronecode.org.uk>

NOTE: This is a candidate for the 7.11 branch.
---
 configure.ac | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5c832e64669..40924a966c5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1936,11 +1936,12 @@ if test "x$with_gallium_drivers" != x; then
             gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" "" "xvmc-nouveau"
             ;;
         xswrast)
+            GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe"
+            if test "x$MESA_LLVM" = x1; then
+                GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS llvmpipe"
+            fi
+
             if test "x$HAVE_ST_DRI" = xyes; then
-                GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe"
-                if test "x$MESA_LLVM" = x1; then
-                    GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS llvmpipe"
-                fi
                 GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS dri-swrast"
             fi
             if test "x$HAVE_ST_VDPAU" = xyes; then
@@ -1958,9 +1959,6 @@ if test "x$with_gallium_drivers" != x; then
                if test "x$HAVE_WINSYS_XLIB" != xyes; then
                   GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/xlib"
                fi
-               if test "x$HAVE_ST_DRI" != xyes; then
-                GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe"
-               fi
             fi
             ;;
         *)

From 0aed27ee37860ba332df776425d89d97ca1168b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 26 Jul 2011 01:05:51 +0200
Subject: [PATCH 104/600] configure.ac: add DLOPEN_LIBS to xlib build

Otherwise xlib-based llvmpipe fails to link.

NOTE: This is a candidate for the 7.11 branch.
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 40924a966c5..1b1823a211f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -951,7 +951,7 @@ xyesyes)
         GL_PC_LIB_PRIV="$GL_LIB_DEPS"
         GL_PC_CFLAGS="$X11_INCLUDES"
     fi
-    GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread"
+    GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread $DLOPEN_LIBS"
     GL_PC_LIB_PRIV="$GL_PC_LIB_PRIV $SELINUX_LIBS -lm -lpthread"
 
     # if static, move the external libraries to the programs

From fe33c886a79f49378e5719909a51e794b7bb1c38 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Wed, 27 Jul 2011 10:35:17 -0700
Subject: [PATCH 105/600] glsl: improve the accuracy of the radians() builtin
 function

The constant used in the radians() function didn't have enough
precision, causing a relative error of 1.676e-5, which is far worse
than the precision of 32-bit floats.  This patch reduces the relative
error to 1.14e-9, which is the best we can do in 32 bits.

Fixes piglit tests {fs,vs}-radians-{float,vec2,vec3,vec4}.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/builtins/ir/radians | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/glsl/builtins/ir/radians b/src/glsl/builtins/ir/radians
index 6a0f5d2e219..a419101cf16 100644
--- a/src/glsl/builtins/ir/radians
+++ b/src/glsl/builtins/ir/radians
@@ -2,20 +2,20 @@
    (signature float
      (parameters
        (declare (in) float arg0))
-     ((return (expression float * (var_ref arg0) (constant float (0.017453))))))
+     ((return (expression float * (var_ref arg0) (constant float (0.0174532925))))))
 
    (signature vec2
      (parameters
        (declare (in) vec2 arg0))
-     ((return (expression vec2 * (var_ref arg0) (constant float (0.017453))))))
+     ((return (expression vec2 * (var_ref arg0) (constant float (0.0174532925))))))
 
    (signature vec3
      (parameters
        (declare (in) vec3 arg0))
-     ((return (expression vec3 * (var_ref arg0) (constant float (0.017453))))))
+     ((return (expression vec3 * (var_ref arg0) (constant float (0.0174532925))))))
 
    (signature vec4
      (parameters
        (declare (in) vec4 arg0))
-     ((return (expression vec4 * (var_ref arg0) (constant float (0.017453))))))
+     ((return (expression vec4 * (var_ref arg0) (constant float (0.0174532925))))))
 ))

From 3e1fd13f605f16e8b48f3a9b71910a3c66eb84b5 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 25 Jul 2011 14:27:07 -0700
Subject: [PATCH 106/600] i965/gen4: Fix message parameter loading for 1D TXD
 sampling.

We were neglecting to load dvdx and dvdy.  v is not optional.

Fixes glslparsertests tex-grad-0[12345].frag on Broadwater/Crestline.
(We still need an execution test using sampler1D.)

NOTE: This is a candidate for the 7.11 branch.

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 9632aae64b0..b82dfd5ead4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -622,6 +622,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        * dPdx = dudx, dvdx, drdx
        * dPdy = dudy, dvdy, drdy
        *
+       * 1-arg: Does not exist.
+       *
        * 2-arg: dudx   dvdx   dudy   dvdy
        *        dPdx.x dPdx.y dPdy.x dPdy.y
        *        m4     m5     m6     m7
@@ -633,14 +635,14 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx);
 	 dPdx.reg_offset++;
-	 mlen++;
       }
+      mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
 
       for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
 	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy);
 	 dPdy.reg_offset++;
-	 mlen++;
       }
+      mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
    } else {
       /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
        * instructions.  We'll need to do SIMD16 here.

From 15c0bc5eefc89bec537e412c02965f201fb1c011 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 25 Jul 2011 17:06:13 -0700
Subject: [PATCH 107/600] i965: Check actual tile offsets in Gen4 miptree
 workaround.

The purpose of the (irb->draw_offset & 4095) != 0 check was to ensure
that we don't have XYy offsets into a tile, since Gen4 hardware doesn't
support that.  However, it's insufficient: there are cases where
draw_offset & 4095 is 0 but we still have a Y-offset.  This leads to an
assertion failure in brw_update_renderbuffer_surface with tile_y != 0.

Instead, simply call intel_renderbuffer_tile_offsets to compute the
actual X/Y offsets and check if either are non-zero.  This makes both
the workaround and the assertion check the same things.

Fixes piglit test fbo-generatemipmap-formats, and should also fix
bugs #34009 and #39487.

NOTE: This is a candidate for stable release branches.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34009
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39487
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Chad Versace <chad@chad-versace.us>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/intel/intel_fbo.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 35be3257ab3..7d6d9f271e6 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -647,6 +647,22 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb,
    }
 }
 
+#ifndef I915
+static bool
+need_tile_offset_workaround(struct brw_context *brw,
+			    struct intel_renderbuffer *irb)
+{
+   uint32_t tile_x, tile_y;
+
+   if (brw->has_surface_tile_offset)
+      return false;
+
+   intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y);
+
+   return tile_x != 0 || tile_y != 0;
+}
+#endif
+
 /**
  * Called by glFramebufferTexture[123]DEXT() (and other places) to
  * prepare for rendering into texture memory.  This might be called
@@ -700,8 +716,7 @@ intel_render_texture(struct gl_context * ctx,
    intel_image->used_as_render_target = GL_TRUE;
 
 #ifndef I915
-   if (!brw_context(ctx)->has_surface_tile_offset &&
-       (irb->draw_offset & 4095) != 0) {
+   if (need_tile_offset_workaround(brw_context(ctx), irb)) {
       /* Original gen4 hardware couldn't draw to a non-tile-aligned
        * destination in a miptree unless you actually setup your
        * renderbuffer as a miptree and used the fragile

From f73caddd3339d284556036d031ab30ce8057a510 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 25 Jul 2011 21:13:43 -0700
Subject: [PATCH 108/600] i965: Remove the now unused
 intel_renderbuffer::draw_offset field.

The previous commit removed the last use of this field.

Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/intel/intel_fbo.c | 1 -
 src/mesa/drivers/dri/intel/intel_fbo.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 7d6d9f271e6..e48d6ef9cbd 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -606,7 +606,6 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb,
 				  zoffset,
 				  &dst_x, &dst_y);
 
-   irb->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp;
    irb->draw_x = dst_x;
    irb->draw_y = dst_y;
 }
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index f7f99a4f00c..2487994fde5 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -58,7 +58,6 @@ struct intel_renderbuffer
 
    /** \} */
 
-   GLuint draw_offset; /**< Offset of drawing address within the region */
    GLuint draw_x, draw_y; /**< Offset of drawing within the region */
 };
 

From 95ee961f77119826382cfbc617334aed986b72e5 Mon Sep 17 00:00:00 2001
From: Vadim Girlin <vadimgirlin@gmail.com>
Date: Fri, 29 Jul 2011 00:33:31 +0400
Subject: [PATCH 109/600] r600g: fix vs export count

Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39572

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
---
 src/gallium/drivers/r600/evergreen_state.c | 2 +-
 src/gallium/drivers/r600/r600_state.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 4605c833dea..152c5cf13a0 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2319,7 +2319,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
 
 	r600_pipe_state_add_reg(rstate,
 			R_0286C4_SPI_VS_OUT_CONFIG,
-			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
+			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1),
 			0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate,
 			R_028860_SQ_PGM_RESOURCES_VS,
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 01406f2bad6..294c400caa1 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2086,7 +2086,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad
 
 	r600_pipe_state_add_reg(rstate,
 			R_0286C4_SPI_VS_OUT_CONFIG,
-			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
+			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1),
 			0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate,
 			R_028868_SQ_PGM_RESOURCES_VS,

From 58d6aa82878fc901d4dadd39e308a5d88b064997 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 27 Jul 2011 15:49:39 -0600
Subject: [PATCH 110/600] st/mesa: fix comment language

---
 src/mesa/state_tracker/st_atom_texture.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index 800a9f1f0e0..3115a2511ce 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -221,9 +221,9 @@ update_single_texture(struct st_context *st, struct pipe_sampler_view **sampler_
 
       if ((samp->sRGBDecode == GL_SKIP_DECODE_EXT) &&
 	  (_mesa_get_format_color_encoding(texFormat) == GL_SRGB)) {
-	 /* don't do sRGB->RGB conversion.  Interpret the texture
-	  * texture data as linear values.
-	  */
+         /* Don't do sRGB->RGB conversion.  Interpret the texture data as
+          * linear values.
+          */
 	 const gl_format linearFormat =
 	    _mesa_get_srgb_format_linear(texFormat);
 	 firstImageFormat = st_mesa_format_to_pipe_format(linearFormat);

From 26684e0b1a857cc16a2c6f2b542e5ccf3da5acf5 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 28 Jul 2011 09:43:09 -0600
Subject: [PATCH 111/600] mesa: test against MESA_FORMAT_NONE in
 _mesa_GetTexLevelParameteriv()

---
 src/mesa/main/texparam.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index c4ec29533e2..3f771f08bc6 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -888,7 +888,7 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
    texObj = _mesa_select_tex_object(ctx, texUnit, target);
 
    img = _mesa_select_tex_image(ctx, texObj, target, level);
-   if (!img || !img->TexFormat) {
+   if (!img || img->TexFormat == MESA_FORMAT_NONE) {
       /* undefined texture image */
       if (pname == GL_TEXTURE_COMPONENTS)
          *params = 1;

From e4fdc95277bd323d8945e20635d3a1702a2e695d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 28 Jul 2011 09:51:30 -0600
Subject: [PATCH 112/600] mesa: fix format selection for meta CopyTexSubImage()

When we do a glReadPixels into the temporary buffer, we don't want to
use GL_LUMINANCE, GL_LUMINANCE_ALPHA or GL_INTENSITY since they will
compute L=R+G+B which is not what we want.

This bug has existed all along but was only exposed by the elimination
of the driver hook for glCopyTexImage() in
5874890c26f434f54e9218b83fae4eb8175c24e9.

Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39604
Tested-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/common/meta.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 26c89519679..f9b4755988b 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -2869,6 +2869,16 @@ copy_tex_sub_image(struct gl_context *ctx,
 
    /* Choose format/type for temporary image buffer */
    format = _mesa_get_format_base_format(texImage->TexFormat);
+   if (format == GL_LUMINANCE ||
+       format == GL_LUMINANCE_ALPHA ||
+       format == GL_INTENSITY) {
+      /* We don't want to use GL_LUMINANCE, GL_INTENSITY, etc. for the
+       * temp image buffer because glReadPixels will do L=R+G+B which is
+       * not what we want (should be L=R).
+       */
+      format = GL_RGBA;
+   }
+
    type = get_temp_image_type(ctx, format);
    bpp = _mesa_bytes_per_pixel(format, type);
    if (bpp <= 0) {

From f79e3518b4e39cd27f679c402e715154f63107f6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 13 Jul 2011 16:08:42 -0700
Subject: [PATCH 113/600] softpipe: When doing write_all_cbufs, don't stomp
 over the color.

We have to make it through this loop processing the color multiple
times, so we can't go overwriting it on our first color buffer.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/softpipe/sp_quad_blend.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index 82f9785e32a..c881194768a 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -817,17 +817,25 @@ blend_fallback(struct quad_stage *qs,
                               quads[0]->input.y0);
       boolean has_dst_alpha
          = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format);
-      uint q, i, j, qbuf;
-
-      qbuf = write_all ? 0 : cbuf;
+      uint q, i, j;
 
       for (q = 0; q < nr; q++) {
          struct quad_header *quad = quads[q];
          float (*quadColor)[4];
+         float temp_quad_color[QUAD_SIZE][4];
          const int itx = (quad->input.x0 & (TILE_SIZE-1));
          const int ity = (quad->input.y0 & (TILE_SIZE-1));
 
-         quadColor = quad->output.color[qbuf];
+         if (write_all) {
+            for (j = 0; j < QUAD_SIZE; j++) {
+               for (i = 0; i < 4; i++) {
+                  temp_quad_color[i][j] = quad->output.color[0][i][j];
+               }
+            }
+            quadColor = temp_quad_color;
+         } else {
+            quadColor = quad->output.color[cbuf];
+         }
 
          /* get/swizzle dest colors
           */

From 83f5d5e6aa58754f52c3579c27d810c497fe13a3 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 18:42:21 -0700
Subject: [PATCH 114/600] Add dependency generation for Mesa and GLSL dricore
 objects.

Reviewed-By: Christopher James Halse Rogers
	     <christopher.halse.rogers@canonical.com>
---
 src/glsl/Makefile | 1 +
 src/mesa/Makefile | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/glsl/Makefile b/src/glsl/Makefile
index 005b51d724b..c20a6c9edd9 100644
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@@ -164,6 +164,7 @@ depend: $(ALL_SOURCES) Makefile
 	rm -f depend
 	touch depend
 	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(ALL_SOURCES) 2> /dev/null
+	$(MKDEP) $(MKDEP_OPTIONS) -a -p $(DRICORE_OBJ_DIR)/ $(INCLUDES) $(ALL_SOURCES) 2> /dev/null
 
 # Remove .o and backup files
 clean: clean-dricore
diff --git a/src/mesa/Makefile b/src/mesa/Makefile
index a903a260ac9..88f31b68695 100644
--- a/src/mesa/Makefile
+++ b/src/mesa/Makefile
@@ -12,11 +12,10 @@ DRICORE_OBJ_DIR := objs-dricore
 include sources.mak
 
 # adjust object dirs
+DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS))
 MESA_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_OBJECTS))
 MESA_GALLIUM_OBJECTS := $(addprefix $(MESA_OBJ_DIR)/, $(MESA_GALLIUM_OBJECTS))
 
-DRICORE_OBJECTS := $(addprefix $(DRICORE_OBJ_DIR)/, $(MESA_OBJECTS))
-
 # define preprocessor flags
 MESA_CPPFLAGS := $(API_DEFINES) $(DEFINES)
 
@@ -124,6 +123,8 @@ depend: $(ALL_SOURCES)
 	@ touch depend
 	@$(MKDEP) $(MKDEP_OPTIONS) -p$(MESA_OBJ_DIR)/ $(MESA_CPPFLAGS) \
 		$(ALL_SOURCES) > /dev/null 2>/dev/null
+	@$(MKDEP) $(MKDEP_OPTIONS) -a -p$(DRICORE_OBJ_DIR)/ $(MESA_CPPFLAGS) \
+		$(ALL_SOURCES) > /dev/null 2>/dev/null
 
 ######################################################################
 # Installation rules

From a5ab46909e9475da0eb8c814efb8e1859a6e6ed3 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Thu, 28 Jul 2011 13:33:55 +0900
Subject: [PATCH 115/600] egl: make pixmaps and pbuffers EGL_BUFFER_PRESERVED

eglSwapBuffers is no-op to these surface types anyway.
---
 src/egl/main/eglsurface.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c
index c9cfb01388e..3564ecd01b0 100644
--- a/src/egl/main/eglsurface.c
+++ b/src/egl/main/eglsurface.c
@@ -269,11 +269,13 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
 {
    const char *func;
    EGLint renderBuffer = EGL_BACK_BUFFER;
+   EGLint swapBehavior = EGL_BUFFER_PRESERVED;
    EGLint err;
 
    switch (type) {
    case EGL_WINDOW_BIT:
       func = "eglCreateWindowSurface";
+      swapBehavior = EGL_BUFFER_DESTROYED;
       break;
    case EGL_PIXMAP_BIT:
       func = "eglCreatePixmapSurface";
@@ -315,7 +317,7 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
 
    surf->MipmapLevel = 0;
    surf->MultisampleResolve = EGL_MULTISAMPLE_RESOLVE_DEFAULT;
-   surf->SwapBehavior = EGL_BUFFER_DESTROYED;
+   surf->SwapBehavior = swapBehavior;
 
    surf->HorizontalResolution = EGL_UNKNOWN;
    surf->VerticalResolution = EGL_UNKNOWN;

From d6a9564854601bd01a1132f0a17fcab1d2a41481 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Thu, 28 Jul 2011 16:03:11 +0900
Subject: [PATCH 116/600] egl: EGL_MATCH_NATIVE_NATIVE_PIXMAP cannot be
 EGL_DONT_CARE

---
 src/egl/main/eglconfig.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c
index 483d9807cf0..e1d53da3cd5 100644
--- a/src/egl/main/eglconfig.c
+++ b/src/egl/main/eglconfig.c
@@ -529,8 +529,9 @@ _eglParseConfigAttribList(_EGLConfig *conf, _EGLDisplay *dpy,
    if (!_eglValidateConfig(conf, EGL_TRUE))
       return EGL_FALSE;
 
-   /* the spec says that EGL_LEVEL cannot be EGL_DONT_CARE */
-   if (conf->Level == EGL_DONT_CARE)
+   /* EGL_LEVEL and EGL_MATCH_NATIVE_PIXMAP cannot be EGL_DONT_CARE */
+   if (conf->Level == EGL_DONT_CARE ||
+       conf->MatchNativePixmap == EGL_DONT_CARE)
       return EGL_FALSE;
 
    /* ignore other attributes when EGL_CONFIG_ID is given */

From 96ca6a6262293ce4ed460edf0aadd0ddb1470e79 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Fri, 29 Jul 2011 09:58:18 +0900
Subject: [PATCH 117/600] targets/{egl,gbm}: omit unneeded libdrm_radeon

---
 src/gallium/targets/egl-static/Makefile | 2 --
 src/gallium/targets/gbm/Makefile        | 2 --
 2 files changed, 4 deletions(-)

diff --git a/src/gallium/targets/egl-static/Makefile b/src/gallium/targets/egl-static/Makefile
index 5b7b330a1cd..42d34b8eda1 100644
--- a/src/gallium/targets/egl-static/Makefile
+++ b/src/gallium/targets/egl-static/Makefile
@@ -121,7 +121,6 @@ egl_CPPFLAGS += -D_EGL_PIPE_R300=1
 egl_LIBS += \
 	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/r300/libr300.a
-egl_SYS += -ldrm_radeon
 endif
 
 # r600
@@ -130,7 +129,6 @@ egl_CPPFLAGS += -D_EGL_PIPE_R600=1
 egl_LIBS += \
 	$(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
 	$(TOP)/src/gallium/drivers/r600/libr600.a
-egl_SYS += -ldrm_radeon
 endif
 
 # vmwgfx
diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile
index 53104253d4f..3ad3eca1d13 100644
--- a/src/gallium/targets/gbm/Makefile
+++ b/src/gallium/targets/gbm/Makefile
@@ -79,13 +79,11 @@ nouveau_SYS = -ldrm_nouveau
 r300_LIBS = \
 	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/r300/libr300.a
-r300_SYS = -ldrm_radeon
 
 # r600 pipe driver
 r600_LIBS = \
 	$(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
 	$(TOP)/src/gallium/drivers/r600/libr600.a
-r600_SYS = -ldrm_radeon
 
 # vmwgfx pipe driver
 vmwgfx_LIBS = \

From ef1854d09021b6601e59e39fcb71a88fb5e5efb2 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 13 Jul 2011 14:24:41 -0700
Subject: [PATCH 118/600] mesa: Fix ff fragment shader inputs calculation when
 enabling a VS.

The FF VS generation happens just after the FF FS generation in
state.c, so the ctx->VP._Current value is for the previous state
update's vertex shader, not the one that will be chosen as a result of
this state update.  The vertexShader and vertexProgram variables
should be accurately telling us whether there's going to be a
ctx->VP._Current (except on _MaintainTnlProgram drivers, where it's
always true).

The glsl-vs-statechange-1 test was created to test for this, but it
turns out that the bug is hidden by the fact that we call
_mesa_update_state() twice per draw call -- once from
_mesa_valid_to_render() and once from vbo_draw_arrays(), and the
second one was fixing up the first one.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp
index 0b53c28f7ae..dbfa6b57d4d 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -330,8 +330,7 @@ static GLbitfield get_fp_input_mask( struct gl_context *ctx )
       /* _NEW_RENDERMODE */
       fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
    }
-   else if (!(vertexProgram || vertexShader) ||
-            !ctx->VertexProgram._Current) {
+   else if (!(vertexProgram || vertexShader)) {
       /* Fixed function vertex logic */
       /* _NEW_ARRAY */
       GLbitfield varying_inputs = ctx->varying_vp_inputs;

From 4fdd289805d14d4f7a234f88cd375be1b3b96764 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 25 Jul 2011 18:50:43 -0700
Subject: [PATCH 119/600] i965/fs: Respect ARB_color_buffer_float clamping.

This was done in the old codegen path, but not the new one.  Caught by
piglit fbo tests after the conversion to GLSL ff_fragment_shader.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 21 ++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index b82dfd5ead4..4f599fb477e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1745,6 +1745,7 @@ void
 fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
 {
    int reg_width = c->dispatch_width / 8;
+   fs_inst *inst;
 
    if (c->dispatch_width == 8 || intel->gen == 6) {
       /* SIMD8 write looks like:
@@ -1763,8 +1764,10 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
        * m + 6: a0
        * m + 7: a1
        */
-      emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width),
-	   color);
+      inst = emit(BRW_OPCODE_MOV,
+		  fs_reg(MRF, first_color_mrf + index * reg_width),
+		  color);
+      inst->saturate = c->key.clamp_fragment_color;
    } else {
       /* pre-gen6 SIMD16 single source DP write looks like:
        * m + 0: r0
@@ -1782,16 +1785,22 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
 	  * usual destination + 1 for the second half we get
 	  * destination + 4.
 	  */
-	 emit(BRW_OPCODE_MOV,
-	      fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color);
+	 inst = emit(BRW_OPCODE_MOV,
+		     fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index),
+		     color);
+	 inst->saturate = c->key.clamp_fragment_color;
       } else {
 	 push_force_uncompressed();
-	 emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color);
+	 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index),
+		     color);
+	 inst->saturate = c->key.clamp_fragment_color;
 	 pop_force_uncompressed();
 
 	 push_force_sechalf();
 	 color.sechalf = true;
-	 emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color);
+	 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4),
+		     color);
+	 inst->saturate = c->key.clamp_fragment_color;
 	 pop_force_sechalf();
 	 color.sechalf = false;
       }

From 5c9e0ad5fddf216921703a0aa9c911a51226cdfd Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Fri, 29 Jul 2011 10:59:18 +0900
Subject: [PATCH 120/600] st/egl: create pbuffers with PIPE_BIND_SAMPLER_VIEW

So that eglBindTexImage works.
---
 src/gallium/state_trackers/egl/common/egl_g3d_st.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.c b/src/gallium/state_trackers/egl/common/egl_g3d_st.c
index 60c3e332ac9..b839f848d7b 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_st.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.c
@@ -126,7 +126,7 @@ pbuffer_reference_openvg_image(struct egl_g3d_surface *gsurf)
 }
 
 static void
-pbuffer_allocate_render_texture(struct egl_g3d_surface *gsurf)
+pbuffer_allocate_pbuffer_texture(struct egl_g3d_surface *gsurf)
 {
    struct egl_g3d_display *gdpy =
       egl_g3d_display(gsurf->base.Resource.Display);
@@ -141,7 +141,8 @@ pbuffer_allocate_render_texture(struct egl_g3d_surface *gsurf)
    templ.depth0 = 1;
    templ.array_size = 1;
    templ.format = gsurf->stvis.color_format;
-   templ.bind = PIPE_BIND_RENDER_TARGET;
+   /* for rendering and binding to texture */
+   templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
 
    ptex = screen->resource_create(screen, &templ);
    gsurf->render_texture = ptex;
@@ -166,7 +167,7 @@ egl_g3d_st_framebuffer_validate_pbuffer(struct st_framebuffer_iface *stfbi,
       if (!gsurf->render_texture) {
          switch (gsurf->client_buffer_type) {
          case EGL_NONE:
-            pbuffer_allocate_render_texture(gsurf);
+            pbuffer_allocate_pbuffer_texture(gsurf);
             break;
          case EGL_OPENVG_IMAGE:
             pbuffer_reference_openvg_image(gsurf);

From dc1c0ca22a1c7fcaef90b787290144d8e3d77c33 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 29 Jul 2011 11:29:53 -0400
Subject: [PATCH 121/600] r600g: fix up vs export handling

Certain attributes (position, psize, etc.) don't
count as params; they are handled separately by the hw.
However, the VS is required to export at least one param
and r600_shader_from_tgsi() takes care of adding a dummy
export if there is none.  Make sure the VS param export
count in the SPI properly accounts for this.

Note: This is a candidate for the 7.11 branch.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/evergreen_state.c | 12 ++++++++++--
 src/gallium/drivers/r600/r600_shader.c     |  6 ++++++
 src/gallium/drivers/r600/r600_shader.h     |  1 +
 src/gallium/drivers/r600/r600_state.c      | 12 ++++++++++--
 4 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 152c5cf13a0..bc6039dd40c 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2298,7 +2298,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
 	struct r600_pipe_state *rstate = &shader->rstate;
 	struct r600_shader *rshader = &shader->shader;
 	unsigned spi_vs_out_id[10];
-	unsigned i, tmp;
+	unsigned i, tmp, nparams;
 
 	/* clear previous register */
 	rstate->nregs = 0;
@@ -2317,9 +2317,17 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
 					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
 	}
 
+	/* Certain attributes (position, psize, etc.) don't count as params.
+	 * VS is required to export at least one param and r600_shader_from_tgsi()
+	 * takes care of adding a dummy export.
+	 */
+	nparams = rshader->noutput - rshader->npos;
+	if (nparams < 1)
+		nparams = 1;
+
 	r600_pipe_state_add_reg(rstate,
 			R_0286C4_SPI_VS_OUT_CONFIG,
-			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1),
+			S_0286C4_VS_EXPORT_COUNT(nparams - 1),
 			0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate,
 			R_028860_SQ_PGM_RESOURCES_VS,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 494f9370597..fc56656f55d 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -332,6 +332,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 		ctx->shader->output[i].sid = d->Semantic.Index;
 		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
 		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
+		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+			/* these don't count as vertex param exports */
+			if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) ||
+			    (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE))
+				ctx->shader->npos++;
+		}
 		break;
 	case TGSI_FILE_CONSTANT:
 	case TGSI_FILE_TEMPORARY:
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 3ba84bd8907..600c3e2f540 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -40,6 +40,7 @@ struct r600_shader {
 	struct r600_bc		bc;
 	unsigned		ninput;
 	unsigned		noutput;
+	unsigned		npos;
 	unsigned		nlds;
 	struct r600_shader_io	input[32];
 	struct r600_shader_io	output[32];
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 294c400caa1..1350a1cf565 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2062,7 +2062,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad
 	struct r600_pipe_state *rstate = &shader->rstate;
 	struct r600_shader *rshader = &shader->shader;
 	unsigned spi_vs_out_id[10];
-	unsigned i, tmp;
+	unsigned i, tmp, nparams;
 
 	/* clear previous register */
 	rstate->nregs = 0;
@@ -2084,9 +2084,17 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad
 					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
 	}
 
+	/* Certain attributes (position, psize, etc.) don't count as params.
+	 * VS is required to export at least one param and r600_shader_from_tgsi()
+	 * takes care of adding a dummy export.
+	 */
+	nparams = rshader->noutput - rshader->npos;
+	if (nparams < 1)
+		nparams = 1;
+
 	r600_pipe_state_add_reg(rstate,
 			R_0286C4_SPI_VS_OUT_CONFIG,
-			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 1),
+			S_0286C4_VS_EXPORT_COUNT(nparams - 1),
 			0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate,
 			R_028868_SQ_PGM_RESOURCES_VS,

From 44ffb4ae207e48f78fae55925601b8708ed09c1d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 29 Jul 2011 11:52:39 -0700
Subject: [PATCH 122/600] i965/fs: Stop using the exec_list iterator.

The old style has gone out of favor in the project, but I kept copy
and pasting from existing iterator code.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 70 +++++++++----------
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp     |  4 +-
 .../drivers/dri/i965/brw_fs_reg_allocate.cpp  | 16 ++---
 .../dri/i965/brw_fs_schedule_instructions.cpp | 16 ++---
 .../dri/i965/brw_fs_vector_splitting.cpp      | 16 ++---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  | 16 ++---
 6 files changed, 67 insertions(+), 71 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b5ea943387d..15475fbae2f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -621,8 +621,8 @@ fs_visitor::assign_curb_setup()
    }
 
    /* Map the offsets in the UNIFORM file to fixed HW regs. */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == UNIFORM) {
@@ -684,8 +684,8 @@ fs_visitor::assign_urb_setup()
    /* Offset all the urb_setup[] index by the actual position of the
     * setup regs, now that the location of the constants has been chosen.
     */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode == FS_OPCODE_LINTERP) {
 	 assert(inst->src[2].file == FIXED_HW_REG);
@@ -739,8 +739,8 @@ fs_visitor::split_virtual_grfs()
       split_grf[this->delta_x.reg] = false;
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       /* Texturing produces 4 contiguous registers, so no splitting. */
       if (inst->is_tex()) {
@@ -763,8 +763,8 @@ fs_visitor::split_virtual_grfs()
       }
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->dst.file == GRF &&
 	  split_grf[inst->dst.reg] &&
@@ -815,8 +815,8 @@ fs_visitor::setup_pull_constants()
    int pull_uniform_base = max_uniform_components;
    int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base;
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (int i = 0; i < 3; i++) {
 	 if (inst->src[i].file != UNIFORM)
@@ -871,8 +871,8 @@ fs_visitor::calculate_live_intervals()
    }
 
    int ip = 0;
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode == BRW_OPCODE_DO) {
 	 if (loop_depth++ == 0)
@@ -945,8 +945,8 @@ fs_visitor::propagate_constants()
 
    calculate_live_intervals();
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->opcode != BRW_OPCODE_MOV ||
 	  inst->predicated ||
@@ -965,11 +965,9 @@ fs_visitor::propagate_constants()
       /* Found a move of a constant to a GRF.  Find anything else using the GRF
        * before it's written, and replace it with the constant if we can.
        */
-      exec_list_iterator scan_iter = iter;
-      scan_iter.next();
-      for (; scan_iter.has_next(); scan_iter.next()) {
-	 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+      for (fs_inst *scan_inst = (fs_inst *)inst->next;
+	   !scan_inst->is_tail_sentinel();
+	   scan_inst = (fs_inst *)scan_inst->next) {
 	 if (scan_inst->opcode == BRW_OPCODE_DO ||
 	     scan_inst->opcode == BRW_OPCODE_WHILE ||
 	     scan_inst->opcode == BRW_OPCODE_ELSE ||
@@ -1077,8 +1075,8 @@ fs_visitor::dead_code_eliminate()
 
    calculate_live_intervals();
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
 	 inst->remove();
@@ -1101,8 +1099,8 @@ fs_visitor::register_coalesce()
    int if_depth = 0;
    int loop_depth = 0;
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       /* Make sure that we dominate the instructions we're going to
        * scan for interfering with our coalescing, or we won't have
@@ -1141,11 +1139,10 @@ fs_visitor::register_coalesce()
        * program.
        */
       bool interfered = false;
-      exec_list_iterator scan_iter = iter;
-      scan_iter.next();
-      for (; scan_iter.has_next(); scan_iter.next()) {
-	 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
 
+      for (fs_inst *scan_inst = (fs_inst *)inst->next;
+	   !scan_inst->is_tail_sentinel();
+	   scan_inst = (fs_inst *)scan_inst->next) {
 	 if (scan_inst->dst.file == GRF) {
 	    if (scan_inst->dst.reg == inst->dst.reg &&
 		(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
@@ -1176,10 +1173,9 @@ fs_visitor::register_coalesce()
       /* Rewrite the later usage to point at the source of the move to
        * be removed.
        */
-      for (exec_list_iterator scan_iter = iter; scan_iter.has_next();
-	   scan_iter.next()) {
-	 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
-
+      for (fs_inst *scan_inst = inst;
+	   !scan_inst->is_tail_sentinel();
+	   scan_inst = (fs_inst *)scan_inst->next) {
 	 for (int i = 0; i < 3; i++) {
 	    if (scan_inst->src[i].file == GRF &&
 		scan_inst->src[i].reg == inst->dst.reg &&
@@ -1212,8 +1208,8 @@ fs_visitor::compute_to_mrf()
 
    calculate_live_intervals();
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       int ip = next_ip;
       next_ip++;
@@ -1392,8 +1388,8 @@ fs_visitor::remove_duplicate_mrf_writes()
 
    memset(last_mrf_move, 0, sizeof(last_mrf_move));
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       switch (inst->opcode) {
       case BRW_OPCODE_DO:
@@ -1527,8 +1523,8 @@ fs_visitor::run()
       /* Generate FS IR for main().  (the visitor only descends into
        * functions called "main").
        */
-      foreach_iter(exec_list_iterator, iter, *shader->ir) {
-	 ir_instruction *ir = (ir_instruction *)iter.get();
+      foreach_list(node, &*shader->ir) {
+	 ir_instruction *ir = (ir_instruction *)node;
 	 base_ir = ir;
 	 this->result = reg_undef;
 	 ir->accept(this);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index eecfc92eb5b..9fb0153d1f8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -609,8 +609,8 @@ fs_visitor::generate_code()
 	     prog->Name, c->dispatch_width);
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
       struct brw_reg src[3], dst;
 
       if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index b4689d2c293..78daa491156 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -75,8 +75,8 @@ fs_visitor::assign_regs_trivial()
    last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] *
 				       reg_width);
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       assign_reg(hw_reg_mapping, &inst->dst, reg_width);
       assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
@@ -283,8 +283,8 @@ fs_visitor::assign_regs()
 			    reg_width);
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       assign_reg(hw_reg_mapping, &inst->dst, reg_width);
       assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
@@ -336,8 +336,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
     * spill/unspill we'll have to do, and guess that the insides of
     * loops run 10 times.
     */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF) {
@@ -394,8 +394,8 @@ fs_visitor::spill_reg(int spill_reg)
     * virtual grf of the same size.  For most instructions, though, we
     * could just spill/unspill the GRF being accessed.
     */
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      fs_inst *inst = (fs_inst *)iter.get();
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
 
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF &&
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index d8218c26edb..9ec3f502764 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -283,8 +283,8 @@ instruction_scheduler::calculate_deps()
    memset(last_mrf_write, 0, sizeof(last_mrf_write));
 
    /* top-to-bottom dependencies: RAW and WAW. */
-   foreach_iter(exec_list_iterator, iter, instructions) {
-      schedule_node *n = (schedule_node *)iter.get();
+   foreach_list(node, &instructions) {
+      schedule_node *n = (schedule_node *)node;
       fs_inst *inst = n->inst;
 
       /* read-after-write deps. */
@@ -437,8 +437,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
    int time = 0;
 
    /* Remove non-DAG heads from the list. */
-   foreach_iter(exec_list_iterator, iter, instructions) {
-      schedule_node *n = (schedule_node *)iter.get();
+   foreach_list_safe(node, &instructions) {
+      schedule_node *n = (schedule_node *)node;
       if (n->parent_count != 0)
 	 n->remove();
    }
@@ -447,8 +447,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
       schedule_node *chosen = NULL;
       int chosen_time = 0;
 
-      foreach_iter(exec_list_iterator, iter, instructions) {
-	 schedule_node *n = (schedule_node *)iter.get();
+      foreach_list(node, &instructions) {
+	 schedule_node *n = (schedule_node *)node;
 
 	 if (!chosen || n->unblocked_time < chosen_time) {
 	    chosen = n;
@@ -490,8 +490,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
        * progress until the first is done.
        */
       if (chosen->inst->is_math()) {
-	 foreach_iter(exec_list_iterator, iter, instructions) {
-	    schedule_node *n = (schedule_node *)iter.get();
+	 foreach_list(node, &instructions) {
+	    schedule_node *n = (schedule_node *)node;
 
 	    if (n->inst->is_math())
 	       n->unblocked_time = MAX2(n->unblocked_time,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 530ffa26580..a9a60c2fd8a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -122,8 +122,8 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
       break;
    }
 
-   foreach_iter(exec_list_iterator, iter, this->variable_list) {
-      variable_entry *entry = (variable_entry *)iter.get();
+   foreach_list(node, &this->variable_list) {
+      variable_entry *entry = (variable_entry *)node;
       if (entry->var == var)
 	 return entry;
    }
@@ -222,8 +222,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
    if (!var->type->is_vector())
       return NULL;
 
-   foreach_iter(exec_list_iterator, iter, *this->variable_list) {
-      variable_entry *entry = (variable_entry *)iter.get();
+   foreach_list(node, &*this->variable_list) {
+      variable_entry *entry = (variable_entry *)node;
       if (entry->var == var) {
 	 return entry;
       }
@@ -341,8 +341,8 @@ brw_do_vector_splitting(exec_list *instructions)
    visit_list_elements(&refs, instructions);
 
    /* Trim out variables we can't split. */
-   foreach_iter(exec_list_iterator, iter, refs.variable_list) {
-      variable_entry *entry = (variable_entry *)iter.get();
+   foreach_list_safe(node, &refs.variable_list) {
+      variable_entry *entry = (variable_entry *)node;
 
       if (debug) {
 	 printf("vector %s@%p: decl %d, whole_access %d\n",
@@ -363,8 +363,8 @@ brw_do_vector_splitting(exec_list *instructions)
    /* Replace the decls of the vectors to be split with their split
     * components.
     */
-   foreach_iter(exec_list_iterator, iter, refs.variable_list) {
-      variable_entry *entry = (variable_entry *)iter.get();
+   foreach_list(node, &refs.variable_list) {
+      variable_entry *entry = (variable_entry *)node;
       const struct glsl_type *type;
       type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 4f599fb477e..2b769ccbba1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1477,8 +1477,8 @@ fs_visitor::visit(ir_if *ir)
       inst->predicated = true;
    }
 
-   foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
-      ir_instruction *ir = (ir_instruction *)iter.get();
+   foreach_list(node, &ir->then_instructions) {
+      ir_instruction *ir = (ir_instruction *)node;
       this->base_ir = ir;
       this->result = reg_undef;
       ir->accept(this);
@@ -1487,8 +1487,8 @@ fs_visitor::visit(ir_if *ir)
    if (!ir->else_instructions.is_empty()) {
       emit(BRW_OPCODE_ELSE);
 
-      foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
-	 ir_instruction *ir = (ir_instruction *)iter.get();
+      foreach_list(node, &ir->else_instructions) {
+	 ir_instruction *ir = (ir_instruction *)node;
 	 this->base_ir = ir;
 	 this->result = reg_undef;
 	 ir->accept(this);
@@ -1538,8 +1538,8 @@ fs_visitor::visit(ir_loop *ir)
       inst->predicated = true;
    }
 
-   foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
-      ir_instruction *ir = (ir_instruction *)iter.get();
+   foreach_list(node, &ir->body_instructions) {
+      ir_instruction *ir = (ir_instruction *)node;
 
       this->base_ir = ir;
       this->result = reg_undef;
@@ -1595,8 +1595,8 @@ fs_visitor::visit(ir_function *ir)
 
       assert(sig);
 
-      foreach_iter(exec_list_iterator, iter, sig->body) {
-	 ir_instruction *ir = (ir_instruction *)iter.get();
+      foreach_list(node, &sig->body) {
+	 ir_instruction *ir = (ir_instruction *)node;
 	 this->base_ir = ir;
 	 this->result = reg_undef;
 	 ir->accept(this);

From 652ef8569c923cf8e1e254dddc160c7995d258aa Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 15:48:53 -0700
Subject: [PATCH 123/600] Revert "i965: Don't compute brw->wm.input_size_masks
 when it's unused."

This reverts commit 3412069e23b7fa5656262f3dd1aa86f66980594d.  We're
about to start using it in fragment shaders to handle avoiding
projection for fixed function.
---
 src/mesa/drivers/dri/i965/brw_vs_constval.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c
index 9fdfebe9f76..47cc0a7da7a 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_constval.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c
@@ -194,19 +194,11 @@ static void calc_wm_input_sizes( struct brw_context *brw )
    /* BRW_NEW_VERTEX_PROGRAM */
    const struct brw_vertex_program *vp =
       brw_vertex_program_const(brw->vertex_program);
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
    /* BRW_NEW_INPUT_DIMENSIONS */
    struct tracker t;
    GLuint insn;
    GLuint i;
 
-   /* If we're going to go through brw_fs.cpp, we don't end up using
-    * brw->wm.input_size_masks.
-    */
-   if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT])
-      return;
-
    memset(&t, 0, sizeof(t));
 
    /* _NEW_LIGHT */
@@ -246,9 +238,7 @@ static void calc_wm_input_sizes( struct brw_context *brw )
 const struct brw_tracked_state brw_wm_input_sizes = {
    .dirty = {
       .mesa  = _NEW_LIGHT,
-      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
-		BRW_NEW_VERTEX_PROGRAM |
-		BRW_NEW_INPUT_DIMENSIONS),
+      .brw   = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
       .cache = 0
    },
    .prepare = calc_wm_input_sizes

From eb30820f268608cf451da32de69723036dddbc62 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 15:56:46 -0700
Subject: [PATCH 124/600] i965/fs: Port texture projection avoidance
 optimization from the old backend.

This is part of fixing a ~1% performance regression in OpenArena when
changing the fixed function fragment shader to using the new backend.
Right now this just avoids the LINTERP of the projector, not the math
using it.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 15475fbae2f..9c3180fbc1c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -463,9 +463,21 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
 	 } else {
 	    /* Perspective interpolation case. */
 	    for (unsigned int k = 0; k < type->vector_elements; k++) {
-	       struct brw_reg interp = interp_reg(location, k);
-	       emit(FS_OPCODE_LINTERP, attr,
-		    this->delta_x, this->delta_y, fs_reg(interp));
+	       /* FINISHME: At some point we probably want to push
+		* this farther by giving similar treatment to the
+		* other potentially constant components of the
+		* attribute, as well as making brw_vs_constval.c
+		* handle varyings other than gl_TexCoord.
+		*/
+	       if (location >= FRAG_ATTRIB_TEX0 &&
+		   location <= FRAG_ATTRIB_TEX7 &&
+		   k == 3 && !(c->key.proj_attrib_mask & (1 << location))) {
+		  emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f));
+	       } else {
+		  struct brw_reg interp = interp_reg(location, k);
+		  emit(FS_OPCODE_LINTERP, attr,
+		       this->delta_x, this->delta_y, fs_reg(interp));
+	       }
 	       attr.reg_offset++;
 	    }
 

From 6d8d6b41b85a18685351f3023a4cd41266ba9e68 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 16:18:39 -0700
Subject: [PATCH 125/600] i965/fs: If we see a RCP of a constant, try to
 constant fold it.

---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 9c3180fbc1c..351d1dd283e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1056,6 +1056,20 @@ fs_visitor::propagate_constants()
 		  progress = true;
 	       }
 	       break;
+
+	    case FS_OPCODE_RCP:
+	       /* The hardware doesn't do math on immediate values
+		* (because why are you doing that, seriously?), but
+		* the correct answer is to just constant fold it
+		* anyway.
+		*/
+	       assert(i == 0);
+	       if (inst->src[0].imm.f != 0.0f) {
+		  scan_inst->opcode = BRW_OPCODE_MOV;
+		  scan_inst->src[0] = inst->src[0];
+		  progress = true;
+	       }
+	       break;
 	    }
 	 }
 

From a8b86459a1bb74cfdf0d63572a9fe194b2b5b53f Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 16:45:15 -0700
Subject: [PATCH 126/600] i965/fs: Optimize a * 1.0 -> a.

This appears in our instruction stream as a result of the
brw_vs_constval.c handling.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 43 ++++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h   |  1 +
 2 files changed, 44 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 351d1dd283e..a9617c56e12 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1067,6 +1067,7 @@ fs_visitor::propagate_constants()
 	       if (inst->src[0].imm.f != 0.0f) {
 		  scan_inst->opcode = BRW_OPCODE_MOV;
 		  scan_inst->src[0] = inst->src[0];
+		  scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
 		  progress = true;
 	       }
 	       break;
@@ -1087,6 +1088,47 @@ fs_visitor::propagate_constants()
 
    return progress;
 }
+
+
+/**
+ * Attempts to move immediate constants into the immediate
+ * constant slot of following instructions.
+ *
+ * Immediate constants are a bit tricky -- they have to be in the last
+ * operand slot, you can't do abs/negate on them,
+ */
+
+bool
+fs_visitor::opt_algebraic()
+{
+   bool progress = false;
+
+   calculate_live_intervals();
+
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MUL:
+	 if (inst->src[1].file != IMM)
+	    continue;
+
+	 /* a * 1.0 = a */
+	 if (inst->src[1].type == BRW_REGISTER_TYPE_F &&
+	     inst->src[1].imm.f == 1.0) {
+	    inst->opcode = BRW_OPCODE_MOV;
+	    inst->src[1] = reg_undef;
+	    progress = true;
+	    break;
+	 }
+
+	 break;
+      }
+   }
+
+   return progress;
+}
+
 /**
  * Must be called after calculate_live_intervales() to remove unused
  * writes to registers -- register allocation will fail otherwise
@@ -1572,6 +1614,7 @@ fs_visitor::run()
 	 progress = remove_duplicate_mrf_writes() || progress;
 
 	 progress = propagate_constants() || progress;
+	 progress = opt_algebraic() || progress;
 	 progress = register_coalesce() || progress;
 	 progress = compute_to_mrf() || progress;
 	 progress = dead_code_eliminate() || progress;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 2bf850e5dea..89d6cda7e4f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -485,6 +485,7 @@ public:
    void setup_pull_constants();
    void calculate_live_intervals();
    bool propagate_constants();
+   bool opt_algebraic();
    bool register_coalesce();
    bool compute_to_mrf();
    bool dead_code_eliminate();

From f710b8c7501f29f5f8941e757ea1066cbeb03305 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 16:52:54 -0700
Subject: [PATCH 127/600] i965/fs: Allow register coalescing where the source
 is a uniform.

Removes 0.8% of the fragment shader instructions on Unigine Tropics.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a9617c56e12..e07798cebc1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1196,7 +1196,8 @@ fs_visitor::register_coalesce()
       if (inst->opcode != BRW_OPCODE_MOV ||
 	  inst->predicated ||
 	  inst->saturate ||
-	  inst->dst.file != GRF || inst->src[0].file != GRF ||
+	  inst->dst.file != GRF || (inst->src[0].file != GRF &&
+				    inst->src[0].file != UNIFORM)||
 	  inst->dst.type != inst->src[0].type)
 	 continue;
 
@@ -1218,7 +1219,8 @@ fs_visitor::register_coalesce()
 	       interfered = true;
 	       break;
 	    }
-	    if (scan_inst->dst.reg == inst->src[0].reg &&
+	    if (inst->src[0].file == GRF &&
+		scan_inst->dst.reg == inst->src[0].reg &&
 		(scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
 		 scan_inst->is_tex())) {
 	       interfered = true;
@@ -1226,10 +1228,13 @@ fs_visitor::register_coalesce()
 	    }
 	 }
 
-	 /* The gen6 MATH instruction can't handle source modifiers, so avoid
-	  * coalescing those for now.  We should do something more specific.
+	 /* The gen6 MATH instruction can't handle source modifiers or
+	  * unusual register regions, so avoid coalescing those for
+	  * now.  We should do something more specific.
 	  */
-	 if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) {
+	 if (intel->gen >= 6 &&
+	     scan_inst->is_math() &&
+	     (has_source_modifiers || inst->src[0].file == UNIFORM)) {
 	    interfered = true;
 	    break;
 	 }
@@ -1248,11 +1253,10 @@ fs_visitor::register_coalesce()
 	    if (scan_inst->src[i].file == GRF &&
 		scan_inst->src[i].reg == inst->dst.reg &&
 		scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
-	       scan_inst->src[i].reg = inst->src[0].reg;
-	       scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
-	       scan_inst->src[i].abs |= inst->src[0].abs;
-	       scan_inst->src[i].negate ^= inst->src[0].negate;
-	       scan_inst->src[i].smear = inst->src[0].smear;
+	       fs_reg new_src = inst->src[0];
+	       new_src.negate ^= scan_inst->src[i].negate;
+	       new_src.abs |= scan_inst->src[i].abs;
+	       scan_inst->src[i] = new_src;
 	    }
 	 }
       }

From dc1f32deae1ab7366792fe5c7d654e02757985c0 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 29 Jul 2011 16:49:55 -0600
Subject: [PATCH 128/600] mesa: add missing breaks for
 GL_TEXTURE_CUBE_MAP_SEAMLESS queries

And fix indentation.

NOTE: This is a candidate for the 7.11 branch.
---
 src/mesa/main/texparam.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 3f771f08bc6..134f15346e8 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1266,12 +1266,13 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
          break;
 
       case GL_TEXTURE_CUBE_MAP_SEAMLESS:
-      if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
-         *params = (GLfloat) obj->Sampler.CubeMapSeamless;
-      }
-      else {
-         error = GL_TRUE;
-      }
+         if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
+            *params = (GLfloat) obj->Sampler.CubeMapSeamless;
+         }
+         else {
+            error = GL_TRUE;
+         }
+         break;
 
       default:
 	 error = GL_TRUE;
@@ -1441,6 +1442,7 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
          else {
             error = GL_TRUE;
          }
+         break;
 
       default:
          ; /* silence warnings */

From 120d71a45cfda1edfa8cd6b1732e209eb98b53d8 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 29 Jul 2011 16:49:55 -0600
Subject: [PATCH 129/600] mesa: minor comment changes in teximage.c

---
 src/mesa/main/teximage.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 302fd65cb27..27717cfb0f5 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1,6 +1,5 @@
 /*
- * mesa 3-D graphics library
- * Version:  7.6
+ * Mesa 3-D graphics library
  *
  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
  * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
@@ -556,8 +555,6 @@ _mesa_tex_target_to_face(GLenum target)
  * \param target texture target.
  * \param level image level.
  * \param texImage texture image.
- * 
- * This was basically prompted by the introduction of cube maps.
  */
 void
 _mesa_set_tex_image(struct gl_texture_object *tObj,
@@ -709,15 +706,13 @@ get_proxy_target(GLenum target)
 
 /**
  * Get the texture object that corresponds to the target of the given
- * texture unit.
+ * texture unit.  The target should have already been checked for validity.
  *
  * \param ctx GL context.
  * \param texUnit texture unit.
  * \param target texture target.
  *
  * \return pointer to the texture object on success, or NULL on failure.
- * 
- * \sa gl_texture_unit.
  */
 struct gl_texture_object *
 _mesa_select_tex_object(struct gl_context *ctx,

From 200e4972c1579e8dfaa6f11eee2a7e54baad4852 Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Wed, 27 Jul 2011 12:21:27 -0700
Subject: [PATCH 130/600] glsl: Add method
 glsl_type::can_implicitly_convert_to()

This method checks if a source type is identical to or can be implicitly
converted to a target type according to the GLSL 1.20 spec, Section 4.1.10
Implicit Conversions.

The following commits use the method for a bugfix:
    glsl: Fix implicit conversions in non-constructor function calls
    glsl: Fix implicit conversions in array constructors

Note: This is a candidate for the 7.10 and 7.11 branches.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/glsl/glsl_types.cpp | 16 ++++++++++++++++
 src/glsl/glsl_types.h   | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index a5e21bbb96c..c94aec0d2da 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -523,3 +523,19 @@ glsl_type::component_slots() const
       return 0;
    }
 }
+
+bool
+glsl_type::can_implicitly_convert_to(const glsl_type *desired) const
+{
+   if (this == desired)
+      return true;
+
+   /* There is no conversion among matrix types. */
+   if (this->matrix_columns > 1 || desired->matrix_columns > 1)
+      return false;
+
+   /* int and uint can be converted to float. */
+   return desired->is_float()
+          && this->is_integer()
+          && this->vector_elements == desired->vector_elements;
+}
diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index 87f57e7c756..048696693be 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -224,6 +224,41 @@ struct glsl_type {
     */
    unsigned component_slots() const;
 
+   /**
+    * \brief Can this type be implicitly converted to another?
+    *
+    * \return True if the types are identical or if this type can be converted
+    *         to \c desired according to Section 4.1.10 of the GLSL spec.
+    *
+    * \verbatim
+    * From page 25 (31 of the pdf) of the GLSL 1.50 spec, Section 4.1.10
+    * Implicit Conversions:
+    *
+    *     In some situations, an expression and its type will be implicitly
+    *     converted to a different type. The following table shows all allowed
+    *     implicit conversions:
+    *
+    *     Type of expression | Can be implicitly converted to
+    *     --------------------------------------------------
+    *     int                  float
+    *     uint
+    *
+    *     ivec2                vec2
+    *     uvec2
+    *
+    *     ivec3                vec3
+    *     uvec3
+    *
+    *     ivec4                vec4
+    *     uvec4
+    *
+    *     There are no implicit array or structure conversions. For example,
+    *     an array of int cannot be implicitly converted to an array of float.
+    *     There are no implicit conversions between signed and unsigned
+    *     integers.
+    * \endverbatim
+    */
+   bool can_implicitly_convert_to(const glsl_type *desired) const;
 
    /**
     * Query whether or not a type is a scalar (non-vector and non-matrix).

From 8b3627fd7b52723102f070957d87f98073e92d7c Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Wed, 27 Jul 2011 12:31:10 -0700
Subject: [PATCH 131/600] glsl: Fix implicit conversions in non-constructor
 function calls

Context
-------
In ast_function_expression::hir(), parameter_lists_match() checks if the
function call's actual parameter list matches the signature's parameter
list, where the match may require implicit conversion of some arguments.
To check if an implicit conversion exists between individual arguments,
type_compare() is used.

Problems
--------
type_compare() allowed the following illegal implicit conversions:
    bool -> float
    bvecN -> vecN

    int -> uint
    ivecN -> uvecN

    uint -> int
    uvecN -> ivecN

Change
------
type_compare() is buggy, so replace it with glsl_type::can_be_implicitly_converted_to().
This comprises a rewrite of parameter_lists_match().

Fixes piglit:spec/glsl-1.20/compiler/built-in-functions/outerProduct-bvec*.vert

Note: This is a candidate for the 7.10 and 7.11 branches.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/glsl/ir_function.cpp | 46 +++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp
index 0f2f1a0eea4..eca0079c166 100644
--- a/src/glsl/ir_function.cpp
+++ b/src/glsl/ir_function.cpp
@@ -85,12 +85,25 @@ type_compare(const glsl_type *a, const glsl_type *b)
 }
 
 
+/**
+ * \brief Check if two parameter lists match.
+ *
+ * \param list_a Parameters of the function definition.
+ * \param list_b Actual parameters passed to the function.
+ * \return If an exact match, return 0.
+ *         If an inexact match requiring implicit conversion, return 1.
+ *         If not a match, return -1.
+ * \see matching_signature()
+ */
 static int
 parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
 {
    const exec_node *node_a = list_a->head;
    const exec_node *node_b = list_b->head;
-   int total_score = 0;
+
+   /* This is set to true if there is an inexact match requiring an implicit
+    * conversion. */
+   bool inexact_match = false;
 
    for (/* empty */
 	; !node_a->is_tail_sentinel()
@@ -106,12 +119,11 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
       const ir_variable *const param = (ir_variable *) node_a;
       const ir_instruction *const actual = (ir_instruction *) node_b;
 
-      /* Determine whether or not the types match.  If the types are an
-       * exact match, the match score is zero.  If the types don't match
-       * but the actual parameter can be coerced to the type of the declared
-       * parameter, the match score is one.
-       */
-      int score;
+      if (param->type == actual->type)
+	 continue;
+
+      /* Try to find an implicit conversion from actual to param. */
+      inexact_match = true;
       switch ((enum ir_variable_mode)(param->mode)) {
       case ir_var_auto:
       case ir_var_uniform:
@@ -125,11 +137,13 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
 
       case ir_var_const_in:
       case ir_var_in:
-	 score = type_compare(param->type, actual->type);
+	 if (!actual->type->can_implicitly_convert_to(param->type))
+	    return -1;
 	 break;
 
       case ir_var_out:
-	 score = type_compare(actual->type, param->type);
+	 if (!param->type->can_implicitly_convert_to(actual->type))
+	    return -1;
 	 break;
 
       case ir_var_inout:
@@ -137,17 +151,12 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
 	  * there is int -> float but no float -> int), inout parameters must
 	  * be exact matches.
 	  */
-	 score = (type_compare(actual->type, param->type) == 0) ? 0 : -1;
-	 break;
+	 return -1;
 
       default:
 	 assert(false);
-      }
-
-      if (score < 0)
 	 return -1;
-
-      total_score += score;
+      }
    }
 
    /* If all of the parameters from the other parameter list have been
@@ -157,7 +166,10 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
    if (!node_b->is_tail_sentinel())
       return -1;
 
-   return total_score;
+   if (inexact_match)
+      return 1;
+   else
+      return 0;
 }
 
 

From 6efe1a849586e46028c1eb763175904166ec7076 Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Wed, 27 Jul 2011 12:32:10 -0700
Subject: [PATCH 132/600] glsl: Remove ir_function.cpp:type_compare()

The function is no longer used and has been replaced by
glsl_type::can_implicitly_convert_to().

Note: This is a candidate for the 7.10 and 7.11 branches.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/glsl/ir_function.cpp | 61 ----------------------------------------
 1 file changed, 61 deletions(-)

diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp
index eca0079c166..dd63e3078f8 100644
--- a/src/glsl/ir_function.cpp
+++ b/src/glsl/ir_function.cpp
@@ -24,67 +24,6 @@
 #include "glsl_types.h"
 #include "ir.h"
 
-int
-type_compare(const glsl_type *a, const glsl_type *b)
-{
-   /* If the types are the same, they trivially match.
-    */
-   if (a == b)
-      return 0;
-
-   switch (a->base_type) {
-   case GLSL_TYPE_UINT:
-   case GLSL_TYPE_INT:
-   case GLSL_TYPE_BOOL:
-      /* There is no implicit conversion to or from integer types or bool.
-       */
-      if ((a->is_integer() != b->is_integer())
-	  || (a->is_boolean() != b->is_boolean()))
-	 return -1;
-
-      /* FALLTHROUGH */
-
-   case GLSL_TYPE_FLOAT:
-      if ((a->vector_elements != b->vector_elements)
-	  || (a->matrix_columns != b->matrix_columns))
-	 return -1;
-
-      return 1;
-
-   case GLSL_TYPE_SAMPLER:
-   case GLSL_TYPE_STRUCT:
-      /* Samplers and structures must match exactly.
-       */
-      return -1;
-
-   case GLSL_TYPE_ARRAY:
-      if ((b->base_type != GLSL_TYPE_ARRAY)
-	  || (a->length != b->length))
-	 return -1;
-
-      /* From GLSL 1.50 spec, page 27 (page 33 of the PDF):
-       *    "There are no implicit array or structure conversions."
-       *
-       * If the comparison of the array element types detects that a conversion
-       * would be required, the array types do not match.
-       */
-      return (type_compare(a->fields.array, b->fields.array) == 0) ? 0 : -1;
-
-   case GLSL_TYPE_VOID:
-   case GLSL_TYPE_ERROR:
-   default:
-      /* These are all error conditions.  It is invalid for a parameter to
-       * a function to be declared as error, void, or a function.
-       */
-      return -1;
-   }
-
-   /* This point should be unreachable.
-    */
-   assert(0);
-}
-
-
 /**
  * \brief Check if two parameter lists match.
  *

From a5ab9398e34287ed8cbb010d0758790e6692530c Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Wed, 27 Jul 2011 13:00:02 -0700
Subject: [PATCH 133/600] glsl: Fix conversions in array constructors

Array constructors obey narrower conversion rules than other constructors
[1] --- they use the implicit conversion rules [2] instead of the scalar
constructor conversions [3].  But process_array_constructor() was
incorrectly applying the broader rules.

[1] GLSL 1.50 spec, Section 5.4.4 Array Constructors, page 52 (58 of pdf)
[2] GLSL 1.50 spec, Section 4.1.10 Implicit Conversions, page 25 (31 of pdf)
[3] GLSL 1.50 spec, Section 5.4.1 Conversion, page 48 (54 of pdf)

To fix this, first check (with glsl_type::can_be_implicitly_converted_to)
if an implicit conversion is legal before performing the conversion.

Fixes:
piglit:spec/glsl-1.20/compiler/structure-and-array-operations/array-ctor-implicit-conversion-bool-float.vert
piglit:spec/glsl-1.20/compiler/structure-and-array-operations/array-ctor-implicit-conversion-bvec*-vec*.vert

Note: This is a candidate for the 7.10 and 7.11 branches.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/glsl/ast_function.cpp | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index bdb73f48706..8bcf48dfd91 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -442,13 +442,21 @@ process_array_constructor(exec_list *instructions,
       ir_rvalue *ir = (ir_rvalue *) n;
       ir_rvalue *result = ir;
 
-      /* Apply implicit conversions (not the scalar constructor rules!) */
+      /* Apply implicit conversions (not the scalar constructor rules!). See
+       * the spec quote above. */
       if (constructor_type->element_type()->is_float()) {
 	 const glsl_type *desired_type =
 	    glsl_type::get_instance(GLSL_TYPE_FLOAT,
 				    ir->type->vector_elements,
 				    ir->type->matrix_columns);
-	 result = convert_component(ir, desired_type);
+	 if (result->type->can_implicitly_convert_to(desired_type)) {
+	    /* Even though convert_component() implements the constructor
+	     * conversion rules (not the implicit conversion rules), its safe
+	     * to use it here because we already checked that the implicit
+	     * conversion is legal.
+	     */
+	    result = convert_component(ir, desired_type);
+	 }
       }
 
       if (result->type != constructor_type->element_type()) {

From 5081d31a0ed753e7e23c5ed51f572d38aef66bfe Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Wed, 27 Jul 2011 12:37:51 -0700
Subject: [PATCH 134/600] glsl: Clarify ir_function::matching_sigature()

The function used a variable named 'score', which was an outright lie.
A signature matches or it doesn't; there is no fuzzy scoring.

Change the return type of parameter_lists_match() to an enum, and
let ir_function::matching_sigature() switch on that enum.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/glsl/ir_function.cpp | 53 +++++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 20 deletions(-)

diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp
index dd63e3078f8..6cfc32cc2a0 100644
--- a/src/glsl/ir_function.cpp
+++ b/src/glsl/ir_function.cpp
@@ -24,17 +24,28 @@
 #include "glsl_types.h"
 #include "ir.h"
 
+typedef enum {
+   PARAMETER_LIST_NO_MATCH,
+   PARAMETER_LIST_EXACT_MATCH,
+   PARAMETER_LIST_INEXACT_MATCH, /*< Match requires implicit conversion. */
+} parameter_list_match_t;
+
 /**
  * \brief Check if two parameter lists match.
  *
  * \param list_a Parameters of the function definition.
  * \param list_b Actual parameters passed to the function.
- * \return If an exact match, return 0.
- *         If an inexact match requiring implicit conversion, return 1.
- *         If not a match, return -1.
  * \see matching_signature()
  */
-static int
+
+/**
+ * \brief Check if two parameter lists match.
+ *
+ * \param list_a Parameters of the function definition.
+ * \param list_b Actual parameters passed to the function.
+ * \see matching_signature()
+ */
+static parameter_list_match_t
 parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
 {
    const exec_node *node_a = list_a->head;
@@ -52,7 +63,7 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
        * do not match.
        */
       if (node_b->is_tail_sentinel())
-	 return -1;
+	 return PARAMETER_LIST_NO_MATCH;
 
 
       const ir_variable *const param = (ir_variable *) node_a;
@@ -72,17 +83,17 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
 	  * as uniform.
 	  */
 	 assert(0);
-	 return -1;
+	 return PARAMETER_LIST_NO_MATCH;
 
       case ir_var_const_in:
       case ir_var_in:
 	 if (!actual->type->can_implicitly_convert_to(param->type))
-	    return -1;
+	    return PARAMETER_LIST_NO_MATCH;
 	 break;
 
       case ir_var_out:
 	 if (!param->type->can_implicitly_convert_to(actual->type))
-	    return -1;
+	    return PARAMETER_LIST_NO_MATCH;
 	 break;
 
       case ir_var_inout:
@@ -90,11 +101,11 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
 	  * there is int -> float but no float -> int), inout parameters must
 	  * be exact matches.
 	  */
-	 return -1;
+	 return PARAMETER_LIST_NO_MATCH;
 
       default:
 	 assert(false);
-	 return -1;
+	 return PARAMETER_LIST_NO_MATCH;
       }
    }
 
@@ -103,12 +114,12 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
     * match.
     */
    if (!node_b->is_tail_sentinel())
-      return -1;
+      return PARAMETER_LIST_NO_MATCH;
 
    if (inexact_match)
-      return 1;
+      return PARAMETER_LIST_INEXACT_MATCH;
    else
-      return 0;
+      return PARAMETER_LIST_EXACT_MATCH;
 }
 
 
@@ -132,18 +143,20 @@ ir_function::matching_signature(const exec_list *actual_parameters)
       ir_function_signature *const sig =
 	 (ir_function_signature *) iter.get();
 
-      const int score = parameter_lists_match(& sig->parameters,
-					      actual_parameters);
-
-      /* If we found an exact match, simply return it */
-      if (score == 0)
+      switch (parameter_lists_match(& sig->parameters, actual_parameters)) {
+      case PARAMETER_LIST_EXACT_MATCH:
 	 return sig;
-
-      if (score > 0) {
+      case PARAMETER_LIST_INEXACT_MATCH:
 	 if (match == NULL)
 	    match = sig;
 	 else
 	    multiple_inexact_matches = true;
+	 continue;
+      case PARAMETER_LIST_NO_MATCH:
+	 continue;
+      default:
+	 assert(false);
+	 return NULL;
       }
    }
 

From e737a99a6fbafe3ba4b5175eea25d1598dbeb9d8 Mon Sep 17 00:00:00 2001
From: Jeremy Huddleston <jeremyhu@apple.com>
Date: Sun, 31 Jul 2011 09:21:56 -0700
Subject: [PATCH 135/600] Fix PPC detection on darwin

Fixes regression introduced by 7004582c1894ede839c44e292b413fe4916d7e9e

Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com>
---
 src/gallium/include/pipe/p_config.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h
index eea3d79e64b..803b806592c 100644
--- a/src/gallium/include/pipe/p_config.h
+++ b/src/gallium/include/pipe/p_config.h
@@ -99,9 +99,9 @@
 #endif
 #endif
 
-#if defined(__PPC__)
+#if defined(__ppc__) || defined(__ppc64__) || defined(__PPC__)
 #define PIPE_ARCH_PPC
-#if defined(__PPC64__)
+#if defined(__ppc64__) || defined(__PPC64__)
 #define PIPE_ARCH_PPC_64
 #endif
 #endif

From 5b3c7199830b8eaac4df2f8c3f10d0e89b4bd5c5 Mon Sep 17 00:00:00 2001
From: Jeremy Huddleston <jeremyhu@apple.com>
Date: Sun, 31 Jul 2011 09:31:48 -0700
Subject: [PATCH 136/600] darwin: Use machine/endian.h to determine endianness

Signed-off-by: Jeremy Huddleston <jeremyhu@apple.com>
---
 src/gallium/include/pipe/p_config.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h
index 803b806592c..8a5d892c884 100644
--- a/src/gallium/include/pipe/p_config.h
+++ b/src/gallium/include/pipe/p_config.h
@@ -120,6 +120,15 @@
 # define PIPE_ARCH_BIG_ENDIAN
 #endif
 
+#elif defined(__APPLE__)
+#include <machine/endian.h>
+
+#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+# define PIPE_ARCH_LITTLE_ENDIAN
+#elif __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN
+# define PIPE_ARCH_BIG_ENDIAN
+#endif
+
 #else
 
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)

From 5541920e0ac4ea8383c7f896daba24a304aafec6 Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Mon, 1 Aug 2011 09:36:08 -0700
Subject: [PATCH 137/600] glsl: Remove duplicate comment

Remove duplicate doxgen comment for
ir_function.cpp:parameter_lists_match().

Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/glsl/ir_function.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp
index 6cfc32cc2a0..2a4de5b0dcd 100644
--- a/src/glsl/ir_function.cpp
+++ b/src/glsl/ir_function.cpp
@@ -30,14 +30,6 @@ typedef enum {
    PARAMETER_LIST_INEXACT_MATCH, /*< Match requires implicit conversion. */
 } parameter_list_match_t;
 
-/**
- * \brief Check if two parameter lists match.
- *
- * \param list_a Parameters of the function definition.
- * \param list_b Actual parameters passed to the function.
- * \see matching_signature()
- */
-
 /**
  * \brief Check if two parameter lists match.
  *

From d4c80f5f85c749df3fc091ff07b60ef4989fa6d9 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Wed, 27 Jul 2011 14:34:12 -0700
Subject: [PATCH 138/600] glsl: improve the accuracy of the asin() builtin
 function.

The previous formula for asin(x) was algebraically equivalent to:

sign(x)*(pi/2 - sqrt(1-|x|)*(A + B|x| + C|x|^2))

where A, B, and C were arbitrary constants determined by a curve fit.

This formula had a worst case absolute error of 0.00448, an unbounded
worst case relative error, and a discontinuity near x=0.

Changed the formula to:

sign(x)*(pi/2 - sqrt(1-|x|)*(pi/2 + (pi/4-1)|x| + A|x|^2 + B|x|^3))

where A and B are arbitrary constants determined by a curve fit.  This
has a worst case absolute error of 0.00039, a worst case relative
error of 0.000405, and no discontinuities.

I don't expect a significant performance degradation, since the extra
multiply-accumulate should be fast compared to the sqrt() computation.

Fixes piglit tests {vs,fs}-asin-float and {vs,fs}-atan-*
---
 src/glsl/builtins/ir/asin | 68 +++++++++++++++++++++++----------------
 1 file changed, 40 insertions(+), 28 deletions(-)

diff --git a/src/glsl/builtins/ir/asin b/src/glsl/builtins/ir/asin
index e230ad614ee..45d9e672958 100644
--- a/src/glsl/builtins/ir/asin
+++ b/src/glsl/builtins/ir/asin
@@ -5,23 +5,26 @@
      ((return (expression float *
 	       (expression float sign (var_ref x))
 	       (expression float -
-		(expression float *
-		 (constant float (3.1415926))
-		 (constant float (0.5)))
+		(constant float (1.5707964))
 		(expression float *
 		 (expression float sqrt
 		  (expression float -
 		   (constant float (1.0))
 		   (expression float abs (var_ref x))))
 		 (expression float +
-		  (constant float (1.5707288))
+		  (constant float (1.5707964))
 		  (expression float *
 		   (expression float abs (var_ref x))
 		   (expression float +
-		    (constant float (-0.2121144))
+		    (constant float (-0.21460183))
 		    (expression float *
-		     (constant float (0.0742610))
-		     (expression float abs (var_ref x))))))))))))
+		     (expression float abs (var_ref x))
+                     (expression float +
+                      (constant float (0.086566724))
+                      (expression float *
+                       (expression float abs (var_ref x))
+                       (constant float (-0.03102955))
+		     ))))))))))))
 
    (signature vec2
      (parameters
@@ -29,23 +32,26 @@
      ((return (expression vec2 *
 	       (expression vec2 sign (var_ref x))
 	       (expression vec2 -
-		(expression float *
-		 (constant float (3.1415926))
-		 (constant float (0.5)))
+		(constant float (1.5707964))
 		(expression vec2 *
 		 (expression vec2 sqrt
 		  (expression vec2 -
 		   (constant float (1.0))
 		   (expression vec2 abs (var_ref x))))
 		 (expression vec2 +
-		  (constant float (1.5707288))
+		  (constant float (1.5707964))
 		  (expression vec2 *
 		   (expression vec2 abs (var_ref x))
 		   (expression vec2 +
-		    (constant float (-0.2121144))
+		    (constant float (-0.21460183))
 		    (expression vec2 *
-		     (constant float (0.0742610))
-		     (expression vec2 abs (var_ref x))))))))))))
+		     (expression vec2 abs (var_ref x))
+                     (expression vec2 +
+                      (constant float (0.086566724))
+                      (expression vec2 *
+                       (expression vec2 abs (var_ref x))
+                       (constant float (-0.03102955))
+		     ))))))))))))
 
    (signature vec3
      (parameters
@@ -53,23 +59,26 @@
      ((return (expression vec3 *
 	       (expression vec3 sign (var_ref x))
 	       (expression vec3 -
-		(expression float *
-		 (constant float (3.1415926))
-		 (constant float (0.5)))
+		(constant float (1.5707964))
 		(expression vec3 *
 		 (expression vec3 sqrt
 		  (expression vec3 -
 		   (constant float (1.0))
 		   (expression vec3 abs (var_ref x))))
 		 (expression vec3 +
-		  (constant float (1.5707288))
+		  (constant float (1.5707964))
 		  (expression vec3 *
 		   (expression vec3 abs (var_ref x))
 		   (expression vec3 +
-		    (constant float (-0.2121144))
+		    (constant float (-0.21460183))
 		    (expression vec3 *
-		     (constant float (0.0742610))
-		     (expression vec3 abs (var_ref x))))))))))))
+		     (expression vec3 abs (var_ref x))
+                     (expression vec3 +
+                      (constant float (0.086566724))
+                      (expression vec3 *
+                       (expression vec3 abs (var_ref x))
+                       (constant float (-0.03102955))
+		     ))))))))))))
 
    (signature vec4
      (parameters
@@ -77,21 +86,24 @@
      ((return (expression vec4 *
 	       (expression vec4 sign (var_ref x))
 	       (expression vec4 -
-		(expression float *
-		 (constant float (3.1415926))
-		 (constant float (0.5)))
+		(constant float (1.5707964))
 		(expression vec4 *
 		 (expression vec4 sqrt
 		  (expression vec4 -
 		   (constant float (1.0))
 		   (expression vec4 abs (var_ref x))))
 		 (expression vec4 +
-		  (constant float (1.5707288))
+		  (constant float (1.5707964))
 		  (expression vec4 *
 		   (expression vec4 abs (var_ref x))
 		   (expression vec4 +
-		    (constant float (-0.2121144))
+		    (constant float (-0.21460183))
 		    (expression vec4 *
-		     (constant float (0.0742610))
-		     (expression vec4 abs (var_ref x))))))))))))
+		     (expression vec4 abs (var_ref x))
+                     (expression vec4 +
+                      (constant float (0.086566724))
+                      (expression vec4 *
+                       (expression vec4 abs (var_ref x))
+                       (constant float (-0.03102955))
+		     ))))))))))))
 ))

From b1b4ea0b3679db0b8fddaa9663a10d4712bba3b7 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Wed, 27 Jul 2011 15:53:31 -0700
Subject: [PATCH 139/600] glsl: improve the accuracy of the atan(x,y) builtin
 function.

The previous formula for atan(x,y) returned a value of +/- pi whenever
|x|<0.0001, and used a formula based on atan(y/x) otherwise.  This
broke in cases where both x and y were small (e.g. atan(1e-5, 1e-5)).

This patch modifies the formula so that it returns a value of +/- pi
whenever |x|<1e-8*|y|, and uses the formula based on atan(y/x)
otherwise.
---
 src/glsl/builtins/ir/atan | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/glsl/builtins/ir/atan b/src/glsl/builtins/ir/atan
index cfecc1f1749..7b5ea13c6ba 100644
--- a/src/glsl/builtins/ir/atan
+++ b/src/glsl/builtins/ir/atan
@@ -54,7 +54,9 @@
     )
     (
       (declare () float r)
-      (if (expression bool > (expression float abs (var_ref x)) (constant float (0.000100))) (
+      (if (expression bool >
+           (expression float abs (var_ref x))
+           (expression float * (constant float (1.0e-8)) (expression float abs (var_ref y)))) (
         (assign (x) (var_ref r) (call atan ((expression float / (var_ref y) (var_ref x)))))
         (if (expression bool < (var_ref x) (constant float (0.000000)) ) (
           (if (expression bool >= (var_ref y) (constant float (0.000000)) )

From f379d8f73063a4c4d6cf379318c6b37118d46bfa Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 25 Apr 2011 23:37:47 -0500
Subject: [PATCH 140/600] st/mesa: Add a GLSL IR to TGSI translator.

It is still a work in progress at this point, but it produces working and
reasonably well-optimized code.

Originally based on ir_to_mesa and st_mesa_to_tgsi, but does not directly use
Mesa IR instructions in TGSI generation, instead generating TGSI from the
intermediate class glsl_to_tgsi_instruction.  It also has new optimization
passes to replace _mesa_optimize_program.
---
 src/mesa/sources.mak                       |    3 +-
 src/mesa/state_tracker/st_cb_program.c     |   14 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4431 ++++++++++++++++++++
 src/mesa/state_tracker/st_glsl_to_tgsi.h   |   66 +
 src/mesa/state_tracker/st_mesa_to_tgsi.c   |    4 +-
 src/mesa/state_tracker/st_mesa_to_tgsi.h   |    6 +
 src/mesa/state_tracker/st_program.c        |  399 +-
 src/mesa/state_tracker/st_program.h        |   27 +
 8 files changed, 4767 insertions(+), 183 deletions(-)
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 create mode 100644 src/mesa/state_tracker/st_glsl_to_tgsi.h

diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak
index 4b2ec08bbb0..ed008f8813e 100644
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@ -336,7 +336,8 @@ MESA_GALLIUM_SOURCES = \
 
 MESA_GALLIUM_CXX_SOURCES = \
 	$(MAIN_CXX_SOURCES) \
-	$(SHADER_CXX_SOURCES)
+	$(SHADER_CXX_SOURCES) \
+	state_tracker/st_glsl_to_tgsi.cpp
 
 # All the core C sources, for dependency checking
 ALL_SOURCES = \
diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index 32694975d17..2abb4d8f082 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -44,6 +44,7 @@
 #include "st_program.h"
 #include "st_mesa_to_tgsi.h"
 #include "st_cb_program.h"
+#include "st_glsl_to_tgsi.h"
 
 
 
@@ -129,6 +130,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
       {
          struct st_vertex_program *stvp = (struct st_vertex_program *) prog;
          st_release_vp_variants( st, stvp );
+         
+         if (stvp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
       }
       break;
    case MESA_GEOMETRY_PROGRAM:
@@ -137,6 +141,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
             (struct st_geometry_program *) prog;
 
          st_release_gp_variants(st, stgp);
+         
+         if (stgp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi);
 
          if (stgp->tgsi.tokens) {
             st_free_tokens((void *) stgp->tgsi.tokens);
@@ -151,6 +158,9 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
 
          st_release_fp_variants(st, stfp);
          
+         if (stfp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
+         
          if (stfp->tgsi.tokens) {
             st_free_tokens(stfp->tgsi.tokens);
             stfp->tgsi.tokens = NULL;
@@ -242,4 +252,8 @@ st_init_program_functions(struct dd_function_table *functions)
    functions->DeleteProgram = st_delete_program;
    functions->IsProgramNative = st_is_program_native;
    functions->ProgramStringNotify = st_program_string_notify;
+   
+   functions->NewShader = st_new_shader;
+   functions->NewShaderProgram = st_new_shader_program;
+   functions->LinkShader = st_link_shader;
 }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
new file mode 100644
index 00000000000..e1102503ee0
--- /dev/null
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -0,0 +1,4431 @@
+/*
+ * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2011 Bryan Cain
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file glsl_to_tgsi.cpp
+ *
+ * Translate GLSL IR to Mesa's gl_program representation and to TGSI.
+ */
+
+#include <stdio.h>
+#include "main/compiler.h"
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_print_visitor.h"
+#include "ir_expression_flattening.h"
+#include "glsl_types.h"
+#include "glsl_parser_extras.h"
+#include "../glsl/program.h"
+#include "ir_optimization.h"
+#include "ast.h"
+
+extern "C" {
+#include "main/mtypes.h"
+#include "main/shaderapi.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "program/hash_table.h"
+#include "program/prog_instruction.h"
+#include "program/prog_optimize.h"
+#include "program/prog_print.h"
+#include "program/program.h"
+#include "program/prog_uniform.h"
+#include "program/prog_parameter.h"
+#include "program/sampler.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_dump.h"
+#include "st_context.h"
+#include "st_program.h"
+#include "st_glsl_to_tgsi.h"
+#include "st_mesa_to_tgsi.h"
+
+#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
+                           (1 << PROGRAM_ENV_PARAM) |    \
+                           (1 << PROGRAM_STATE_VAR) |    \
+                           (1 << PROGRAM_NAMED_PARAM) |  \
+                           (1 << PROGRAM_CONSTANT) |     \
+                           (1 << PROGRAM_UNIFORM))
+}
+
+class st_src_reg;
+class st_dst_reg;
+
+static int swizzle_for_size(int size);
+
+/**
+ * This struct is a corresponding struct to Mesa prog_src_register, with
+ * wider fields.
+ */
+class st_src_reg {
+public:
+   st_src_reg(gl_register_file file, int index, const glsl_type *type)
+   {
+      this->file = file;
+      this->index = index;
+      if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
+         this->swizzle = swizzle_for_size(type->vector_elements);
+      else
+         this->swizzle = SWIZZLE_XYZW;
+      this->negate = 0;
+      this->reladdr = NULL;
+   }
+
+   st_src_reg()
+   {
+      this->file = PROGRAM_UNDEFINED;
+      this->index = 0;
+      this->swizzle = 0;
+      this->negate = 0;
+      this->reladdr = NULL;
+   }
+
+   explicit st_src_reg(st_dst_reg reg);
+
+   gl_register_file file; /**< PROGRAM_* from Mesa */
+   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+   GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
+   int negate; /**< NEGATE_XYZW mask from mesa */
+   /** Register index should be offset by the integer in this reg. */
+   st_src_reg *reladdr;
+};
+
+class st_dst_reg {
+public:
+   st_dst_reg(gl_register_file file, int writemask)
+   {
+      this->file = file;
+      this->index = 0;
+      this->writemask = writemask;
+      this->cond_mask = COND_TR;
+      this->reladdr = NULL;
+   }
+
+   st_dst_reg()
+   {
+      this->file = PROGRAM_UNDEFINED;
+      this->index = 0;
+      this->writemask = 0;
+      this->cond_mask = COND_TR;
+      this->reladdr = NULL;
+   }
+
+   explicit st_dst_reg(st_src_reg reg);
+
+   gl_register_file file; /**< PROGRAM_* from Mesa */
+   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+   GLuint cond_mask:4;
+   /** Register index should be offset by the integer in this reg. */
+   st_src_reg *reladdr;
+};
+
+st_src_reg::st_src_reg(st_dst_reg reg)
+{
+   this->file = reg.file;
+   this->index = reg.index;
+   this->swizzle = SWIZZLE_XYZW;
+   this->negate = 0;
+   this->reladdr = NULL;
+}
+
+st_dst_reg::st_dst_reg(st_src_reg reg)
+{
+   this->file = reg.file;
+   this->index = reg.index;
+   this->writemask = WRITEMASK_XYZW;
+   this->cond_mask = COND_TR;
+   this->reladdr = reg.reladdr;
+}
+
+class glsl_to_tgsi_instruction : public exec_node {
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   enum prog_opcode op;
+   st_dst_reg dst;
+   st_src_reg src[3];
+   /** Pointer to the ir source this tree came from for debugging */
+   ir_instruction *ir;
+   GLboolean cond_update;
+   bool saturate;
+   int sampler; /**< sampler index */
+   int tex_target; /**< One of TEXTURE_*_INDEX */
+   GLboolean tex_shadow;
+
+   class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
+};
+
+class variable_storage : public exec_node {
+public:
+   variable_storage(ir_variable *var, gl_register_file file, int index)
+      : file(file), index(index), var(var)
+   {
+      /* empty */
+   }
+
+   gl_register_file file;
+   int index;
+   ir_variable *var; /* variable that maps to this, if any */
+};
+
+class function_entry : public exec_node {
+public:
+   ir_function_signature *sig;
+
+   /**
+    * identifier of this function signature used by the program.
+    *
+    * At the point that Mesa instructions for function calls are
+    * generated, we don't know the address of the first instruction of
+    * the function body.  So we make the BranchTarget that is called a
+    * small integer and rewrite them during set_branchtargets().
+    */
+   int sig_id;
+
+   /**
+    * Pointer to first instruction of the function body.
+    *
+    * Set during function body emits after main() is processed.
+    */
+   glsl_to_tgsi_instruction *bgn_inst;
+
+   /**
+    * Index of the first instruction of the function body in actual
+    * Mesa IR.
+    *
+    * Set after convertion from glsl_to_tgsi_instruction to prog_instruction.
+    */
+   int inst;
+
+   /** Storage for the return value. */
+   st_src_reg return_reg;
+};
+
+class glsl_to_tgsi_visitor : public ir_visitor {
+public:
+   glsl_to_tgsi_visitor();
+   ~glsl_to_tgsi_visitor();
+
+   function_entry *current_function;
+
+   struct gl_context *ctx;
+   struct gl_program *prog;
+   struct gl_shader_program *shader_program;
+   struct gl_shader_compiler_options *options;
+
+   int next_temp;
+   
+   int num_address_regs;
+   bool indirect_addr_temps;
+   bool indirect_addr_consts;
+
+   variable_storage *find_variable_storage(ir_variable *var);
+
+   function_entry *get_function_signature(ir_function_signature *sig);
+
+   st_src_reg get_temp(const glsl_type *type);
+   void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
+
+   st_src_reg st_src_reg_for_float(float val);
+
+   /**
+    * \name Visit methods
+    *
+    * As typical for the visitor pattern, there must be one \c visit method for
+    * each concrete subclass of \c ir_instruction.  Virtual base classes within
+    * the hierarchy should not have \c visit methods.
+    */
+   /*@{*/
+   virtual void visit(ir_variable *);
+   virtual void visit(ir_loop *);
+   virtual void visit(ir_loop_jump *);
+   virtual void visit(ir_function_signature *);
+   virtual void visit(ir_function *);
+   virtual void visit(ir_expression *);
+   virtual void visit(ir_swizzle *);
+   virtual void visit(ir_dereference_variable  *);
+   virtual void visit(ir_dereference_array *);
+   virtual void visit(ir_dereference_record *);
+   virtual void visit(ir_assignment *);
+   virtual void visit(ir_constant *);
+   virtual void visit(ir_call *);
+   virtual void visit(ir_return *);
+   virtual void visit(ir_discard *);
+   virtual void visit(ir_texture *);
+   virtual void visit(ir_if *);
+   /*@}*/
+
+   st_src_reg result;
+
+   /** List of variable_storage */
+   exec_list variables;
+
+   /** List of function_entry */
+   exec_list function_signatures;
+   int next_signature_id;
+
+   /** List of glsl_to_tgsi_instruction */
+   exec_list instructions;
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
+        		        st_dst_reg dst, st_src_reg src0);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
+        		        st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
+        		        st_dst_reg dst,
+        		        st_src_reg src0, st_src_reg src1, st_src_reg src2);
+
+   /**
+    * Emit the correct dot-product instruction for the type of arguments
+    */
+   void emit_dp(ir_instruction *ir,
+                st_dst_reg dst,
+                st_src_reg src0,
+                st_src_reg src1,
+                unsigned elements);
+
+   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+        	    st_dst_reg dst, st_src_reg src0);
+
+   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+        	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+
+   void emit_scs(ir_instruction *ir, enum prog_opcode op,
+        	 st_dst_reg dst, const st_src_reg &src);
+
+   GLboolean try_emit_mad(ir_expression *ir,
+        		  int mul_operand);
+   GLboolean try_emit_sat(ir_expression *ir);
+
+   void emit_swz(ir_expression *ir);
+
+   bool process_move_condition(ir_rvalue *ir);
+
+   void rename_temp_register(int index, int new_index);
+   int get_first_temp_read(int index);
+   int get_first_temp_write(int index);
+   int get_last_temp_read(int index);
+   int get_last_temp_write(int index);
+
+   void copy_propagate(void);
+   void eliminate_dead_code(void);
+   void merge_registers(void);
+   void renumber_registers(void);
+
+   void *mem_ctx;
+};
+
+static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL);
+
+static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
+
+static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
+
+static void
+fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
+
+static void
+fail_link(struct gl_shader_program *prog, const char *fmt, ...)
+{
+   va_list args;
+   va_start(args, fmt);
+   ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
+   va_end(args);
+
+   prog->LinkStatus = GL_FALSE;
+}
+
+static int
+swizzle_for_size(int size)
+{
+   int size_swizzles[4] = {
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
+   };
+
+   assert((size >= 1) && (size <= 4));
+   return size_swizzles[size - 1];
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
+        		 st_dst_reg dst,
+        		 st_src_reg src0, st_src_reg src1, st_src_reg src2)
+{
+   glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
+   int num_reladdr = 0, i;
+
+   /* If we have to do relative addressing, we want to load the ARL
+    * reg directly for one of the regs, and preload the other reladdr
+    * sources into temps.
+    */
+   num_reladdr += dst.reladdr != NULL;
+   num_reladdr += src0.reladdr != NULL;
+   num_reladdr += src1.reladdr != NULL;
+   num_reladdr += src2.reladdr != NULL;
+
+   reladdr_to_temp(ir, &src2, &num_reladdr);
+   reladdr_to_temp(ir, &src1, &num_reladdr);
+   reladdr_to_temp(ir, &src0, &num_reladdr);
+
+   if (dst.reladdr) {
+      emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
+      num_reladdr--;
+   }
+   assert(num_reladdr == 0);
+
+   inst->op = op;
+   inst->dst = dst;
+   inst->src[0] = src0;
+   inst->src[1] = src1;
+   inst->src[2] = src2;
+   inst->ir = ir;
+
+   inst->function = NULL;
+   
+   if (op == OPCODE_ARL)
+      this->num_address_regs = 1;
+   
+   /* Update indirect addressing status used by TGSI */
+   if (dst.reladdr) {
+      switch(dst.file) {
+      case PROGRAM_TEMPORARY:
+         this->indirect_addr_temps = true;
+         break;
+      case PROGRAM_LOCAL_PARAM:
+      case PROGRAM_ENV_PARAM:
+      case PROGRAM_STATE_VAR:
+      case PROGRAM_NAMED_PARAM:
+      case PROGRAM_CONSTANT:
+      case PROGRAM_UNIFORM:
+         this->indirect_addr_consts = true;
+         break;
+      default:
+         break;
+      }
+   }
+   else {
+      for (i=0; i<3; i++) {
+         if(inst->src[i].reladdr) {
+            switch(dst.file) {
+            case PROGRAM_TEMPORARY:
+               this->indirect_addr_temps = true;
+               break;
+            case PROGRAM_LOCAL_PARAM:
+            case PROGRAM_ENV_PARAM:
+            case PROGRAM_STATE_VAR:
+            case PROGRAM_NAMED_PARAM:
+            case PROGRAM_CONSTANT:
+            case PROGRAM_UNIFORM:
+               this->indirect_addr_consts = true;
+               break;
+            default:
+               break;
+            }
+         }
+      }
+   }
+
+   this->instructions.push_tail(inst);
+   
+   return inst;
+}
+
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
+        		 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
+{
+   return emit(ir, op, dst, src0, src1, undef_src);
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
+        		 st_dst_reg dst, st_src_reg src0)
+{
+   assert(dst.writemask != 0);
+   return emit(ir, op, dst, src0, undef_src, undef_src);
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op)
+{
+   return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
+}
+
+void
+glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
+        		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
+        		    unsigned elements)
+{
+   static const gl_inst_opcode dot_opcodes[] = {
+      OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
+   };
+
+   emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+}
+
+/**
+ * Emits Mesa scalar opcodes to produce unique answers across channels.
+ *
+ * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
+ * channel determines the result across all channels.  So to do a vec4
+ * of this operation, we want to emit a scalar per source channel used
+ * to produce dest channels.
+ */
+void
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
+        		        st_dst_reg dst,
+        			st_src_reg orig_src0, st_src_reg orig_src1)
+{
+   int i, j;
+   int done_mask = ~dst.writemask;
+
+   /* Mesa RCP is a scalar operation splatting results to all channels,
+    * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
+    * dst channels.
+    */
+   for (i = 0; i < 4; i++) {
+      GLuint this_mask = (1 << i);
+      glsl_to_tgsi_instruction *inst;
+      st_src_reg src0 = orig_src0;
+      st_src_reg src1 = orig_src1;
+
+      if (done_mask & this_mask)
+         continue;
+
+      GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
+      GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
+      for (j = i + 1; j < 4; j++) {
+         /* If there is another enabled component in the destination that is
+          * derived from the same inputs, generate its value on this pass as
+          * well.
+          */
+         if (!(done_mask & (1 << j)) &&
+             GET_SWZ(src0.swizzle, j) == src0_swiz &&
+             GET_SWZ(src1.swizzle, j) == src1_swiz) {
+            this_mask |= (1 << j);
+         }
+      }
+      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+        			   src0_swiz, src0_swiz);
+      src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
+        			  src1_swiz, src1_swiz);
+
+      inst = emit(ir, op, dst, src0, src1);
+      inst->dst.writemask = this_mask;
+      done_mask |= this_mask;
+   }
+}
+
+void
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
+        		        st_dst_reg dst, st_src_reg src0)
+{
+   st_src_reg undef = undef_src;
+
+   undef.swizzle = SWIZZLE_XXXX;
+
+   emit_scalar(ir, op, dst, src0, undef);
+}
+
+/**
+ * Emit an OPCODE_SCS instruction
+ *
+ * The \c SCS opcode functions a bit differently than the other Mesa (or
+ * ARB_fragment_program) opcodes.  Instead of splatting its result across all
+ * four components of the destination, it writes one value to the \c x
+ * component and another value to the \c y component.
+ *
+ * \param ir        IR instruction being processed
+ * \param op        Either \c OPCODE_SIN or \c OPCODE_COS depending on which
+ *                  value is desired.
+ * \param dst       Destination register
+ * \param src       Source register
+ */
+void
+glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
+        		     st_dst_reg dst,
+        		     const st_src_reg &src)
+{
+   /* Vertex programs cannot use the SCS opcode.
+    */
+   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
+      emit_scalar(ir, op, dst, src);
+      return;
+   }
+
+   const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
+   const unsigned scs_mask = (1U << component);
+   int done_mask = ~dst.writemask;
+   st_src_reg tmp;
+
+   assert(op == OPCODE_SIN || op == OPCODE_COS);
+
+   /* If there are compnents in the destination that differ from the component
+    * that will be written by the SCS instrution, we'll need a temporary.
+    */
+   if (scs_mask != unsigned(dst.writemask)) {
+      tmp = get_temp(glsl_type::vec4_type);
+   }
+
+   for (unsigned i = 0; i < 4; i++) {
+      unsigned this_mask = (1U << i);
+      st_src_reg src0 = src;
+
+      if ((done_mask & this_mask) != 0)
+         continue;
+
+      /* The source swizzle specified which component of the source generates
+       * sine / cosine for the current component in the destination.  The SCS
+       * instruction requires that this value be swizzle to the X component.
+       * Replace the current swizzle with a swizzle that puts the source in
+       * the X component.
+       */
+      unsigned src0_swiz = GET_SWZ(src.swizzle, i);
+
+      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+        			   src0_swiz, src0_swiz);
+      for (unsigned j = i + 1; j < 4; j++) {
+         /* If there is another enabled component in the destination that is
+          * derived from the same inputs, generate its value on this pass as
+          * well.
+          */
+         if (!(done_mask & (1 << j)) &&
+             GET_SWZ(src0.swizzle, j) == src0_swiz) {
+            this_mask |= (1 << j);
+         }
+      }
+
+      if (this_mask != scs_mask) {
+         glsl_to_tgsi_instruction *inst;
+         st_dst_reg tmp_dst = st_dst_reg(tmp);
+
+         /* Emit the SCS instruction.
+          */
+         inst = emit(ir, OPCODE_SCS, tmp_dst, src0);
+         inst->dst.writemask = scs_mask;
+
+         /* Move the result of the SCS instruction to the desired location in
+          * the destination.
+          */
+         tmp.swizzle = MAKE_SWIZZLE4(component, component,
+        			     component, component);
+         inst = emit(ir, OPCODE_SCS, dst, tmp);
+         inst->dst.writemask = this_mask;
+      } else {
+         /* Emit the SCS instruction to write directly to the destination.
+          */
+         glsl_to_tgsi_instruction *inst = emit(ir, OPCODE_SCS, dst, src0);
+         inst->dst.writemask = scs_mask;
+      }
+
+      done_mask |= this_mask;
+   }
+}
+
+struct st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
+{
+   st_src_reg src(PROGRAM_CONSTANT, -1, NULL);
+
+   src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
+        				  &val, 1, &src.swizzle);
+
+   return src;
+}
+
+static int
+type_size(const struct glsl_type *type)
+{
+   unsigned int i;
+   int size;
+
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      if (type->is_matrix()) {
+         return type->matrix_columns;
+      } else {
+         /* Regardless of size of vector, it gets a vec4. This is bad
+          * packing for things like floats, but otherwise arrays become a
+          * mess.  Hopefully a later pass over the code can pack scalars
+          * down if appropriate.
+          */
+         return 1;
+      }
+   case GLSL_TYPE_ARRAY:
+      assert(type->length > 0);
+      return type_size(type->fields.array) * type->length;
+   case GLSL_TYPE_STRUCT:
+      size = 0;
+      for (i = 0; i < type->length; i++) {
+         size += type_size(type->fields.structure[i].type);
+      }
+      return size;
+   case GLSL_TYPE_SAMPLER:
+      /* Samplers take up one slot in UNIFORMS[], but they're baked in
+       * at link time.
+       */
+      return 1;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+/**
+ * In the initial pass of codegen, we assign temporary numbers to
+ * intermediate results.  (not SSA -- variable assignments will reuse
+ * storage).  Actual register allocation for the Mesa VM occurs in a
+ * pass over the Mesa IR later.
+ */
+st_src_reg
+glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
+{
+   st_src_reg src;
+   int swizzle[4];
+   int i;
+
+   src.file = PROGRAM_TEMPORARY;
+   src.index = next_temp;
+   src.reladdr = NULL;
+   next_temp += type_size(type);
+
+   if (type->is_array() || type->is_record()) {
+      src.swizzle = SWIZZLE_NOOP;
+   } else {
+      for (i = 0; i < type->vector_elements; i++)
+         swizzle[i] = i;
+      for (; i < 4; i++)
+         swizzle[i] = type->vector_elements - 1;
+      src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
+        			  swizzle[2], swizzle[3]);
+   }
+   src.negate = 0;
+
+   return src;
+}
+
+variable_storage *
+glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
+{
+   
+   variable_storage *entry;
+
+   foreach_iter(exec_list_iterator, iter, this->variables) {
+      entry = (variable_storage *)iter.get();
+
+      if (entry->var == var)
+         return entry;
+   }
+
+   return NULL;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_variable *ir)
+{
+   if (strcmp(ir->name, "gl_FragCoord") == 0) {
+      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+
+      fp->OriginUpperLeft = ir->origin_upper_left;
+      fp->PixelCenterInteger = ir->pixel_center_integer;
+
+   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
+      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+      switch (ir->depth_layout) {
+      case ir_depth_layout_none:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
+         break;
+      case ir_depth_layout_any:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
+         break;
+      case ir_depth_layout_greater:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
+         break;
+      case ir_depth_layout_less:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
+         break;
+      case ir_depth_layout_unchanged:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
+         break;
+      default:
+         assert(0);
+         break;
+      }
+   }
+
+   if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
+      unsigned int i;
+      const ir_state_slot *const slots = ir->state_slots;
+      assert(ir->state_slots != NULL);
+
+      /* Check if this statevar's setup in the STATE file exactly
+       * matches how we'll want to reference it as a
+       * struct/array/whatever.  If not, then we need to move it into
+       * temporary storage and hope that it'll get copy-propagated
+       * out.
+       */
+      for (i = 0; i < ir->num_state_slots; i++) {
+         if (slots[i].swizzle != SWIZZLE_XYZW) {
+            break;
+         }
+      }
+
+      struct variable_storage *storage;
+      st_dst_reg dst;
+      if (i == ir->num_state_slots) {
+         /* We'll set the index later. */
+         storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
+         this->variables.push_tail(storage);
+
+         dst = undef_dst;
+      } else {
+         /* The variable_storage constructor allocates slots based on the size
+          * of the type.  However, this had better match the number of state
+          * elements that we're going to copy into the new temporary.
+          */
+         assert((int) ir->num_state_slots == type_size(ir->type));
+
+         storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
+        					 this->next_temp);
+         this->variables.push_tail(storage);
+         this->next_temp += type_size(ir->type);
+
+         dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
+      }
+
+
+      for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+         int index = _mesa_add_state_reference(this->prog->Parameters,
+        				       (gl_state_index *)slots[i].tokens);
+
+         if (storage->file == PROGRAM_STATE_VAR) {
+            if (storage->index == -1) {
+               storage->index = index;
+            } else {
+               assert(index == storage->index + (int)i);
+            }
+         } else {
+            st_src_reg src(PROGRAM_STATE_VAR, index, NULL);
+            src.swizzle = slots[i].swizzle;
+            emit(ir, OPCODE_MOV, dst, src);
+            /* even a float takes up a whole vec4 reg in a struct/array. */
+            dst.index++;
+         }
+      }
+
+      if (storage->file == PROGRAM_TEMPORARY &&
+          dst.index != storage->index + (int) ir->num_state_slots) {
+         fail_link(this->shader_program,
+        	   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
+        	   ir->name, dst.index - storage->index,
+        	   type_size(ir->type));
+      }
+   }
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_loop *ir)
+{
+   ir_dereference_variable *counter = NULL;
+
+   if (ir->counter != NULL)
+      counter = new(ir) ir_dereference_variable(ir->counter);
+
+   if (ir->from != NULL) {
+      assert(ir->counter != NULL);
+
+      ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
+
+      a->accept(this);
+      delete a;
+   }
+
+   emit(NULL, OPCODE_BGNLOOP);
+
+   if (ir->to) {
+      ir_expression *e =
+         new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
+        		       counter, ir->to);
+      ir_if *if_stmt =  new(ir) ir_if(e);
+
+      ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
+
+      if_stmt->then_instructions.push_tail(brk);
+
+      if_stmt->accept(this);
+
+      delete if_stmt;
+      delete e;
+      delete brk;
+   }
+
+   visit_exec_list(&ir->body_instructions, this);
+
+   if (ir->increment) {
+      ir_expression *e =
+         new(ir) ir_expression(ir_binop_add, counter->type,
+        		       counter, ir->increment);
+
+      ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
+
+      a->accept(this);
+      delete a;
+      delete e;
+   }
+
+   emit(NULL, OPCODE_ENDLOOP);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
+{
+   switch (ir->mode) {
+   case ir_loop_jump::jump_break:
+      emit(NULL, OPCODE_BRK);
+      break;
+   case ir_loop_jump::jump_continue:
+      emit(NULL, OPCODE_CONT);
+      break;
+   }
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
+{
+   assert(0);
+   (void)ir;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_function *ir)
+{
+   /* Ignore function bodies other than main() -- we shouldn't see calls to
+    * them since they should all be inlined before we get to glsl_to_tgsi.
+    */
+   if (strcmp(ir->name, "main") == 0) {
+      const ir_function_signature *sig;
+      exec_list empty;
+
+      sig = ir->matching_signature(&empty);
+
+      assert(sig);
+
+      foreach_iter(exec_list_iterator, iter, sig->body) {
+         ir_instruction *ir = (ir_instruction *)iter.get();
+
+         ir->accept(this);
+      }
+   }
+}
+
+GLboolean
+glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
+{
+   int nonmul_operand = 1 - mul_operand;
+   st_src_reg a, b, c;
+
+   ir_expression *expr = ir->operands[mul_operand]->as_expression();
+   if (!expr || expr->operation != ir_binop_mul)
+      return false;
+
+   expr->operands[0]->accept(this);
+   a = this->result;
+   expr->operands[1]->accept(this);
+   b = this->result;
+   ir->operands[nonmul_operand]->accept(this);
+   c = this->result;
+
+   this->result = get_temp(ir->type);
+   emit(ir, OPCODE_MAD, st_dst_reg(this->result), a, b, c);
+
+   return true;
+}
+
+GLboolean
+glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
+{
+   /* Saturates were only introduced to vertex programs in
+    * NV_vertex_program3, so don't give them to drivers in the VP.
+    */
+   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
+      return false;
+
+   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+   if (!sat_src)
+      return false;
+
+   sat_src->accept(this);
+   st_src_reg src = this->result;
+
+   this->result = get_temp(ir->type);
+   glsl_to_tgsi_instruction *inst;
+   inst = emit(ir, OPCODE_MOV, st_dst_reg(this->result), src);
+   inst->saturate = true;
+
+   return true;
+}
+
+void
+glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
+        			    st_src_reg *reg, int *num_reladdr)
+{
+   if (!reg->reladdr)
+      return;
+
+   emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
+
+   if (*num_reladdr != 1) {
+      st_src_reg temp = get_temp(glsl_type::vec4_type);
+
+      emit(ir, OPCODE_MOV, st_dst_reg(temp), *reg);
+      *reg = temp;
+   }
+
+   (*num_reladdr)--;
+}
+
+void
+glsl_to_tgsi_visitor::emit_swz(ir_expression *ir)
+{
+   /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
+    * This means that each of the operands is either an immediate value of -1,
+    * 0, or 1, or is a component from one source register (possibly with
+    * negation).
+    */
+   uint8_t components[4] = { 0 };
+   bool negate[4] = { false };
+   ir_variable *var = NULL;
+
+   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
+      ir_rvalue *op = ir->operands[i];
+
+      assert(op->type->is_scalar());
+
+      while (op != NULL) {
+         switch (op->ir_type) {
+         case ir_type_constant: {
+
+            assert(op->type->is_scalar());
+
+            const ir_constant *const c = op->as_constant();
+            if (c->is_one()) {
+               components[i] = SWIZZLE_ONE;
+            } else if (c->is_zero()) {
+               components[i] = SWIZZLE_ZERO;
+            } else if (c->is_negative_one()) {
+               components[i] = SWIZZLE_ONE;
+               negate[i] = true;
+            } else {
+               assert(!"SWZ constant must be 0.0 or 1.0.");
+            }
+
+            op = NULL;
+            break;
+         }
+
+         case ir_type_dereference_variable: {
+            ir_dereference_variable *const deref =
+               (ir_dereference_variable *) op;
+
+            assert((var == NULL) || (deref->var == var));
+            components[i] = SWIZZLE_X;
+            var = deref->var;
+            op = NULL;
+            break;
+         }
+
+         case ir_type_expression: {
+            ir_expression *const expr = (ir_expression *) op;
+
+            assert(expr->operation == ir_unop_neg);
+            negate[i] = true;
+
+            op = expr->operands[0];
+            break;
+         }
+
+         case ir_type_swizzle: {
+            ir_swizzle *const swiz = (ir_swizzle *) op;
+
+            components[i] = swiz->mask.x;
+            op = swiz->val;
+            break;
+         }
+
+         default:
+            assert(!"Should not get here.");
+            return;
+         }
+      }
+   }
+
+   assert(var != NULL);
+
+   ir_dereference_variable *const deref =
+      new(mem_ctx) ir_dereference_variable(var);
+
+   this->result.file = PROGRAM_UNDEFINED;
+   deref->accept(this);
+   if (this->result.file == PROGRAM_UNDEFINED) {
+      ir_print_visitor v;
+      printf("Failed to get tree for expression operand:\n");
+      deref->accept(&v);
+      exit(1);
+   }
+
+   st_src_reg src;
+
+   src = this->result;
+   src.swizzle = MAKE_SWIZZLE4(components[0],
+        		       components[1],
+        		       components[2],
+        		       components[3]);
+   src.negate = ((unsigned(negate[0]) << 0)
+        	 | (unsigned(negate[1]) << 1)
+        	 | (unsigned(negate[2]) << 2)
+        	 | (unsigned(negate[3]) << 3));
+
+   /* Storage for our result.  Ideally for an assignment we'd be using the
+    * actual storage for the result here, instead.
+    */
+   const st_src_reg result_src = get_temp(ir->type);
+   st_dst_reg result_dst = st_dst_reg(result_src);
+
+   /* Limit writes to the channels that will be used by result_src later.
+    * This does limit this temp's use as a temporary for multi-instruction
+    * sequences.
+    */
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+   emit(ir, OPCODE_SWZ, result_dst, src);
+   this->result = result_src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_expression *ir)
+{
+   unsigned int operand;
+   st_src_reg op[Elements(ir->operands)];
+   st_src_reg result_src;
+   st_dst_reg result_dst;
+
+   /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
+    */
+   if (ir->operation == ir_binop_add) {
+      if (try_emit_mad(ir, 1))
+         return;
+      if (try_emit_mad(ir, 0))
+         return;
+   }
+   if (try_emit_sat(ir))
+      return;
+
+   if (ir->operation == ir_quadop_vector) {
+      this->emit_swz(ir);
+      return;
+   }
+
+   for (operand = 0; operand < ir->get_num_operands(); operand++) {
+      this->result.file = PROGRAM_UNDEFINED;
+      ir->operands[operand]->accept(this);
+      if (this->result.file == PROGRAM_UNDEFINED) {
+         ir_print_visitor v;
+         printf("Failed to get tree for expression operand:\n");
+         ir->operands[operand]->accept(&v);
+         exit(1);
+      }
+      op[operand] = this->result;
+
+      /* Matrix expression operands should have been broken down to vector
+       * operations already.
+       */
+      assert(!ir->operands[operand]->type->is_matrix());
+   }
+
+   int vector_elements = ir->operands[0]->type->vector_elements;
+   if (ir->operands[1]) {
+      vector_elements = MAX2(vector_elements,
+        		     ir->operands[1]->type->vector_elements);
+   }
+
+   this->result.file = PROGRAM_UNDEFINED;
+
+   /* Storage for our result.  Ideally for an assignment we'd be using
+    * the actual storage for the result here, instead.
+    */
+   result_src = get_temp(ir->type);
+   /* convenience for the emit functions below. */
+   result_dst = st_dst_reg(result_src);
+   /* Limit writes to the channels that will be used by result_src later.
+    * This does limit this temp's use as a temporary for multi-instruction
+    * sequences.
+    */
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+   switch (ir->operation) {
+   case ir_unop_logic_not:
+      emit(ir, OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0));
+      break;
+   case ir_unop_neg:
+      op[0].negate = ~op[0].negate;
+      result_src = op[0];
+      break;
+   case ir_unop_abs:
+      emit(ir, OPCODE_ABS, result_dst, op[0]);
+      break;
+   case ir_unop_sign:
+      emit(ir, OPCODE_SSG, result_dst, op[0]);
+      break;
+   case ir_unop_rcp:
+      emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
+      break;
+
+   case ir_unop_exp2:
+      emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
+      break;
+   case ir_unop_exp:
+   case ir_unop_log:
+      assert(!"not reached: should be handled by ir_explog_to_explog2");
+      break;
+   case ir_unop_log2:
+      emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
+      break;
+   case ir_unop_sin:
+      emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos:
+      emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
+      break;
+   case ir_unop_sin_reduced:
+      emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos_reduced:
+      emit_scs(ir, OPCODE_COS, result_dst, op[0]);
+      break;
+
+   case ir_unop_dFdx:
+      emit(ir, OPCODE_DDX, result_dst, op[0]);
+      break;
+   case ir_unop_dFdy:
+      emit(ir, OPCODE_DDY, result_dst, op[0]);
+      break;
+
+   case ir_unop_noise: {
+      const enum prog_opcode opcode =
+         prog_opcode(OPCODE_NOISE1
+        	     + (ir->operands[0]->type->vector_elements) - 1);
+      assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
+
+      emit(ir, opcode, result_dst, op[0]);
+      break;
+   }
+
+   case ir_binop_add:
+      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_sub:
+      emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_mul:
+      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_div:
+      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+   case ir_binop_mod:
+      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      break;
+
+   case ir_binop_less:
+      emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_greater:
+      emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_lequal:
+      emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_gequal:
+      emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_equal:
+      emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_nequal:
+      emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_all_equal:
+      /* "==" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+          ir->operands[1]->type->is_vector()) {
+         st_src_reg temp = get_temp(glsl_type::vec4_type);
+         emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         emit_dp(ir, result_dst, temp, temp, vector_elements);
+         emit(ir, OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
+      } else {
+         emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
+      }
+      break;
+   case ir_binop_any_nequal:
+      /* "!=" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+          ir->operands[1]->type->is_vector()) {
+         st_src_reg temp = get_temp(glsl_type::vec4_type);
+         emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         emit_dp(ir, result_dst, temp, temp, vector_elements);
+         emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      } else {
+         emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
+      }
+      break;
+
+   case ir_unop_any:
+      assert(ir->operands[0]->type->is_vector());
+      emit_dp(ir, result_dst, op[0], op[0],
+              ir->operands[0]->type->vector_elements);
+      emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      break;
+
+   case ir_binop_logic_xor:
+      emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_logic_or:
+      /* This could be a saturated add and skip the SNE. */
+      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+      emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      break;
+
+   case ir_binop_logic_and:
+      /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
+      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_dot:
+      assert(ir->operands[0]->type->is_vector());
+      assert(ir->operands[0]->type == ir->operands[1]->type);
+      emit_dp(ir, result_dst, op[0], op[1],
+              ir->operands[0]->type->vector_elements);
+      break;
+
+   case ir_unop_sqrt:
+      /* sqrt(x) = x * rsq(x). */
+      emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
+      emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
+      /* For incoming channels <= 0, set the result to 0. */
+      op[0].negate = ~op[0].negate;
+      emit(ir, OPCODE_CMP, result_dst,
+        		  op[0], result_src, st_src_reg_for_float(0.0));
+      break;
+   case ir_unop_rsq:
+      emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
+      break;
+   case ir_unop_i2f:
+   case ir_unop_b2f:
+   case ir_unop_b2i:
+      /* Mesa IR lacks types, ints are stored as truncated floats. */
+      result_src = op[0];
+      break;
+   case ir_unop_f2i:
+      emit(ir, OPCODE_TRUNC, result_dst, op[0]);
+      break;
+   case ir_unop_f2b:
+   case ir_unop_i2b:
+      emit(ir, OPCODE_SNE, result_dst,
+        		  op[0], st_src_reg_for_float(0.0));
+      break;
+   case ir_unop_trunc:
+      emit(ir, OPCODE_TRUNC, result_dst, op[0]);
+      break;
+   case ir_unop_ceil:
+      op[0].negate = ~op[0].negate;
+      emit(ir, OPCODE_FLR, result_dst, op[0]);
+      result_src.negate = ~result_src.negate;
+      break;
+   case ir_unop_floor:
+      emit(ir, OPCODE_FLR, result_dst, op[0]);
+      break;
+   case ir_unop_fract:
+      emit(ir, OPCODE_FRC, result_dst, op[0]);
+      break;
+
+   case ir_binop_min:
+      emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_max:
+      emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_pow:
+      emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
+      break;
+
+   case ir_unop_bit_not:
+   case ir_unop_u2f:
+   case ir_binop_lshift:
+   case ir_binop_rshift:
+   case ir_binop_bit_and:
+   case ir_binop_bit_xor:
+   case ir_binop_bit_or:
+   case ir_unop_round_even:
+      assert(!"GLSL 1.30 features unsupported");
+      break;
+
+   case ir_quadop_vector:
+      /* This operation should have already been handled.
+       */
+      assert(!"Should not get here.");
+      break;
+   }
+
+   this->result = result_src;
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
+{
+   st_src_reg src;
+   int i;
+   int swizzle[4];
+
+   /* Note that this is only swizzles in expressions, not those on the left
+    * hand side of an assignment, which do write masking.  See ir_assignment
+    * for that.
+    */
+
+   ir->val->accept(this);
+   src = this->result;
+   assert(src.file != PROGRAM_UNDEFINED);
+
+   for (i = 0; i < 4; i++) {
+      if (i < ir->type->vector_elements) {
+         switch (i) {
+         case 0:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
+            break;
+         case 1:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
+            break;
+         case 2:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
+            break;
+         case 3:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
+            break;
+         }
+      } else {
+         /* If the type is smaller than a vec4, replicate the last
+          * channel out.
+          */
+         swizzle[i] = swizzle[ir->type->vector_elements - 1];
+      }
+   }
+
+   src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+
+   this->result = src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
+{
+   variable_storage *entry = find_variable_storage(ir->var);
+   ir_variable *var = ir->var;
+
+   if (!entry) {
+      switch (var->mode) {
+      case ir_var_uniform:
+         entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
+        				       var->location);
+         this->variables.push_tail(entry);
+         break;
+      case ir_var_in:
+      case ir_var_inout:
+         /* The linker assigns locations for varyings and attributes,
+          * including deprecated builtins (like gl_Color), user-assign
+          * generic attributes (glBindVertexLocation), and
+          * user-defined varyings.
+          *
+          * FINISHME: We would hit this path for function arguments.  Fix!
+          */
+         assert(var->location != -1);
+         entry = new(mem_ctx) variable_storage(var,
+                                               PROGRAM_INPUT,
+                                               var->location);
+         if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
+             var->location >= VERT_ATTRIB_GENERIC0) {
+            _mesa_add_attribute(this->prog->Attributes,
+                                var->name,
+                                _mesa_sizeof_glsl_type(var->type->gl_type),
+                                var->type->gl_type,
+                                var->location - VERT_ATTRIB_GENERIC0);
+         }
+         break;
+      case ir_var_out:
+         assert(var->location != -1);
+         entry = new(mem_ctx) variable_storage(var,
+                                               PROGRAM_OUTPUT,
+                                               var->location);
+         break;
+      case ir_var_system_value:
+         entry = new(mem_ctx) variable_storage(var,
+                                               PROGRAM_SYSTEM_VALUE,
+                                               var->location);
+         break;
+      case ir_var_auto:
+      case ir_var_temporary:
+         entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
+        				       this->next_temp);
+         this->variables.push_tail(entry);
+
+         next_temp += type_size(var->type);
+         break;
+      }
+
+      if (!entry) {
+         printf("Failed to make storage for %s\n", var->name);
+         exit(1);
+      }
+   }
+
+   this->result = st_src_reg(entry->file, entry->index, var->type);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
+{
+   ir_constant *index;
+   st_src_reg src;
+   int element_size = type_size(ir->type);
+
+   index = ir->array_index->constant_expression_value();
+
+   ir->array->accept(this);
+   src = this->result;
+
+   if (index) {
+      src.index += index->value.i[0] * element_size;
+   } else {
+      st_src_reg array_base = this->result;
+      /* Variable index array dereference.  It eats the "vec4" of the
+       * base of the array and an index that offsets the Mesa register
+       * index.
+       */
+      ir->array_index->accept(this);
+
+      st_src_reg index_reg;
+
+      if (element_size == 1) {
+         index_reg = this->result;
+      } else {
+         index_reg = get_temp(glsl_type::float_type);
+
+         emit(ir, OPCODE_MUL, st_dst_reg(index_reg),
+              this->result, st_src_reg_for_float(element_size));
+      }
+
+      src.reladdr = ralloc(mem_ctx, st_src_reg);
+      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      src.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      src.swizzle = SWIZZLE_NOOP;
+
+   this->result = src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
+{
+   unsigned int i;
+   const glsl_type *struct_type = ir->record->type;
+   int offset = 0;
+
+   ir->record->accept(this);
+
+   for (i = 0; i < struct_type->length; i++) {
+      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+         break;
+      offset += type_size(struct_type->fields.structure[i].type);
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      this->result.swizzle = SWIZZLE_NOOP;
+
+   this->result.index += offset;
+}
+
+/**
+ * We want to be careful in assignment setup to hit the actual storage
+ * instead of potentially using a temporary like we might with the
+ * ir_dereference handler.
+ */
+static st_dst_reg
+get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
+{
+   /* The LHS must be a dereference.  If the LHS is a variable indexed array
+    * access of a vector, it must be separated into a series conditional moves
+    * before reaching this point (see ir_vec_index_to_cond_assign).
+    */
+   assert(ir->as_dereference());
+   ir_dereference_array *deref_array = ir->as_dereference_array();
+   if (deref_array) {
+      assert(!deref_array->array->type->is_vector());
+   }
+
+   /* Use the rvalue deref handler for the most part.  We'll ignore
+    * swizzles in it and write swizzles using writemask, though.
+    */
+   ir->accept(v);
+   return st_dst_reg(v->result);
+}
+
+/**
+ * Process the condition of a conditional assignment
+ *
+ * Examines the condition of a conditional assignment to generate the optimal
+ * first operand of a \c CMP instruction.  If the condition is a relational
+ * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
+ * used as the source for the \c CMP instruction.  Otherwise the comparison
+ * is processed to a boolean result, and the boolean result is used as the
+ * operand to the CMP instruction.
+ */
+bool
+glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
+{
+   ir_rvalue *src_ir = ir;
+   bool negate = true;
+   bool switch_order = false;
+
+   ir_expression *const expr = ir->as_expression();
+   if ((expr != NULL) && (expr->get_num_operands() == 2)) {
+      bool zero_on_left = false;
+
+      if (expr->operands[0]->is_zero()) {
+         src_ir = expr->operands[1];
+         zero_on_left = true;
+      } else if (expr->operands[1]->is_zero()) {
+         src_ir = expr->operands[0];
+         zero_on_left = false;
+      }
+
+      /*      a is -  0  +            -  0  +
+       * (a <  0)  T  F  F  ( a < 0)  T  F  F
+       * (0 <  a)  F  F  T  (-a < 0)  F  F  T
+       * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
+       * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
+       * (a >  0)  F  F  T  (-a < 0)  F  F  T
+       * (0 >  a)  T  F  F  ( a < 0)  T  F  F
+       * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
+       * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
+       *
+       * Note that exchanging the order of 0 and 'a' in the comparison simply
+       * means that the value of 'a' should be negated.
+       */
+      if (src_ir != ir) {
+         switch (expr->operation) {
+         case ir_binop_less:
+            switch_order = false;
+            negate = zero_on_left;
+            break;
+
+         case ir_binop_greater:
+            switch_order = false;
+            negate = !zero_on_left;
+            break;
+
+         case ir_binop_lequal:
+            switch_order = true;
+            negate = !zero_on_left;
+            break;
+
+         case ir_binop_gequal:
+            switch_order = true;
+            negate = zero_on_left;
+            break;
+
+         default:
+            /* This isn't the right kind of comparison afterall, so make sure
+             * the whole condition is visited.
+             */
+            src_ir = ir;
+            break;
+         }
+      }
+   }
+
+   src_ir->accept(this);
+
+   /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
+    * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
+    * choose which value OPCODE_CMP produces without an extra instruction
+    * computing the condition.
+    */
+   if (negate)
+      this->result.negate = ~this->result.negate;
+
+   return switch_order;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_assignment *ir)
+{
+   st_dst_reg l;
+   st_src_reg r;
+   int i;
+
+   ir->rhs->accept(this);
+   r = this->result;
+
+   l = get_assignment_lhs(ir->lhs, this);
+
+   /* FINISHME: This should really set to the correct maximal writemask for each
+    * FINISHME: component written (in the loops below).  This case can only
+    * FINISHME: occur for matrices, arrays, and structures.
+    */
+   if (ir->write_mask == 0) {
+      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
+      l.writemask = WRITEMASK_XYZW;
+   } else if (ir->lhs->type->is_scalar()) {
+      /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
+       * FINISHME: W component of fragment shader output zero, work correctly.
+       */
+      l.writemask = WRITEMASK_XYZW;
+   } else {
+      int swizzles[4];
+      int first_enabled_chan = 0;
+      int rhs_chan = 0;
+
+      assert(ir->lhs->type->is_vector());
+      l.writemask = ir->write_mask;
+
+      for (int i = 0; i < 4; i++) {
+         if (l.writemask & (1 << i)) {
+            first_enabled_chan = GET_SWZ(r.swizzle, i);
+            break;
+         }
+      }
+
+      /* Swizzle a small RHS vector into the channels being written.
+       *
+       * glsl ir treats write_mask as dictating how many channels are
+       * present on the RHS while Mesa IR treats write_mask as just
+       * showing which channels of the vec4 RHS get written.
+       */
+      for (int i = 0; i < 4; i++) {
+         if (l.writemask & (1 << i))
+            swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
+         else
+            swizzles[i] = first_enabled_chan;
+      }
+      r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
+        			swizzles[2], swizzles[3]);
+   }
+
+   assert(l.file != PROGRAM_UNDEFINED);
+   assert(r.file != PROGRAM_UNDEFINED);
+
+   if (ir->condition) {
+      const bool switch_order = this->process_move_condition(ir->condition);
+      st_src_reg condition = this->result;
+
+      for (i = 0; i < type_size(ir->lhs->type); i++) {
+         if (switch_order) {
+            emit(ir, OPCODE_CMP, l, condition, st_src_reg(l), r);
+         } else {
+            emit(ir, OPCODE_CMP, l, condition, r, st_src_reg(l));
+         }
+
+         l.index++;
+         r.index++;
+      }
+   } else {
+      for (i = 0; i < type_size(ir->lhs->type); i++) {
+         emit(ir, OPCODE_MOV, l, r);
+         l.index++;
+         r.index++;
+      }
+   }
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_constant *ir)
+{
+   st_src_reg src;
+   GLfloat stack_vals[4] = { 0 };
+   GLfloat *values = stack_vals;
+   unsigned int i;
+
+   /* Unfortunately, 4 floats is all we can get into
+    * _mesa_add_unnamed_constant.  So, make a temp to store an
+    * aggregate constant and move each constant value into it.  If we
+    * get lucky, copy propagation will eliminate the extra moves.
+    */
+
+   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+      st_src_reg temp_base = get_temp(ir->type);
+      st_dst_reg temp = st_dst_reg(temp_base);
+
+      foreach_iter(exec_list_iterator, iter, ir->components) {
+         ir_constant *field_value = (ir_constant *)iter.get();
+         int size = type_size(field_value->type);
+
+         assert(size > 0);
+
+         field_value->accept(this);
+         src = this->result;
+
+         for (i = 0; i < (unsigned int)size; i++) {
+            emit(ir, OPCODE_MOV, temp, src);
+
+            src.index++;
+            temp.index++;
+         }
+      }
+      this->result = temp_base;
+      return;
+   }
+
+   if (ir->type->is_array()) {
+      st_src_reg temp_base = get_temp(ir->type);
+      st_dst_reg temp = st_dst_reg(temp_base);
+      int size = type_size(ir->type->fields.array);
+
+      assert(size > 0);
+
+      for (i = 0; i < ir->type->length; i++) {
+         ir->array_elements[i]->accept(this);
+         src = this->result;
+         for (int j = 0; j < size; j++) {
+            emit(ir, OPCODE_MOV, temp, src);
+
+            src.index++;
+            temp.index++;
+         }
+      }
+      this->result = temp_base;
+      return;
+   }
+
+   if (ir->type->is_matrix()) {
+      st_src_reg mat = get_temp(ir->type);
+      st_dst_reg mat_column = st_dst_reg(mat);
+
+      for (i = 0; i < ir->type->matrix_columns; i++) {
+         assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+         values = &ir->value.f[i * ir->type->vector_elements];
+
+         src = st_src_reg(PROGRAM_CONSTANT, -1, NULL);
+         src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
+        					values,
+        					ir->type->vector_elements,
+        					&src.swizzle);
+         emit(ir, OPCODE_MOV, mat_column, src);
+
+         mat_column.index++;
+      }
+
+      this->result = mat;
+      return;
+   }
+
+   src.file = PROGRAM_CONSTANT;
+   switch (ir->type->base_type) {
+   case GLSL_TYPE_FLOAT:
+      values = &ir->value.f[0];
+      break;
+   case GLSL_TYPE_UINT:
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         values[i] = ir->value.u[i];
+      }
+      break;
+   case GLSL_TYPE_INT:
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         values[i] = ir->value.i[i];
+      }
+      break;
+   case GLSL_TYPE_BOOL:
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         values[i] = ir->value.b[i];
+      }
+      break;
+   default:
+      assert(!"Non-float/uint/int/bool constant");
+   }
+
+   this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type);
+   this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
+        					   values,
+        					   ir->type->vector_elements,
+        					   &this->result.swizzle);
+}
+
+function_entry *
+glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
+{
+   function_entry *entry;
+
+   foreach_iter(exec_list_iterator, iter, this->function_signatures) {
+      entry = (function_entry *)iter.get();
+
+      if (entry->sig == sig)
+         return entry;
+   }
+
+   entry = ralloc(mem_ctx, function_entry);
+   entry->sig = sig;
+   entry->sig_id = this->next_signature_id++;
+   entry->bgn_inst = NULL;
+
+   /* Allocate storage for all the parameters. */
+   foreach_iter(exec_list_iterator, iter, sig->parameters) {
+      ir_variable *param = (ir_variable *)iter.get();
+      variable_storage *storage;
+
+      storage = find_variable_storage(param);
+      assert(!storage);
+
+      storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
+        				      this->next_temp);
+      this->variables.push_tail(storage);
+
+      this->next_temp += type_size(param->type);
+   }
+
+   if (!sig->return_type->is_void()) {
+      entry->return_reg = get_temp(sig->return_type);
+   } else {
+      entry->return_reg = undef_src;
+   }
+
+   this->function_signatures.push_tail(entry);
+   return entry;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_call *ir)
+{
+   glsl_to_tgsi_instruction *call_inst;
+   ir_function_signature *sig = ir->get_callee();
+   function_entry *entry = get_function_signature(sig);
+   int i;
+
+   /* Process in parameters. */
+   exec_list_iterator sig_iter = sig->parameters.iterator();
+   foreach_iter(exec_list_iterator, iter, *ir) {
+      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
+      ir_variable *param = (ir_variable *)sig_iter.get();
+
+      if (param->mode == ir_var_in ||
+          param->mode == ir_var_inout) {
+         variable_storage *storage = find_variable_storage(param);
+         assert(storage);
+
+         param_rval->accept(this);
+         st_src_reg r = this->result;
+
+         st_dst_reg l;
+         l.file = storage->file;
+         l.index = storage->index;
+         l.reladdr = NULL;
+         l.writemask = WRITEMASK_XYZW;
+         l.cond_mask = COND_TR;
+
+         for (i = 0; i < type_size(param->type); i++) {
+            emit(ir, OPCODE_MOV, l, r);
+            l.index++;
+            r.index++;
+         }
+      }
+
+      sig_iter.next();
+   }
+   assert(!sig_iter.has_next());
+
+   /* Emit call instruction */
+   call_inst = emit(ir, OPCODE_CAL);
+   call_inst->function = entry;
+
+   /* Process out parameters. */
+   sig_iter = sig->parameters.iterator();
+   foreach_iter(exec_list_iterator, iter, *ir) {
+      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
+      ir_variable *param = (ir_variable *)sig_iter.get();
+
+      if (param->mode == ir_var_out ||
+          param->mode == ir_var_inout) {
+         variable_storage *storage = find_variable_storage(param);
+         assert(storage);
+
+         st_src_reg r;
+         r.file = storage->file;
+         r.index = storage->index;
+         r.reladdr = NULL;
+         r.swizzle = SWIZZLE_NOOP;
+         r.negate = 0;
+
+         param_rval->accept(this);
+         st_dst_reg l = st_dst_reg(this->result);
+
+         for (i = 0; i < type_size(param->type); i++) {
+            emit(ir, OPCODE_MOV, l, r);
+            l.index++;
+            r.index++;
+         }
+      }
+
+      sig_iter.next();
+   }
+   assert(!sig_iter.has_next());
+
+   /* Process return value. */
+   this->result = entry->return_reg;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_texture *ir)
+{
+   st_src_reg result_src, coord, lod_info, projector, dx, dy;
+   st_dst_reg result_dst, coord_dst;
+   glsl_to_tgsi_instruction *inst = NULL;
+   prog_opcode opcode = OPCODE_NOP;
+
+   ir->coordinate->accept(this);
+
+   /* Put our coords in a temp.  We'll need to modify them for shadow,
+    * projection, or LOD, so the only case we'd use it as is is if
+    * we're doing plain old texturing.  Mesa IR optimization should
+    * handle cleaning up our mess in that case.
+    */
+   coord = get_temp(glsl_type::vec4_type);
+   coord_dst = st_dst_reg(coord);
+   emit(ir, OPCODE_MOV, coord_dst, this->result);
+
+   if (ir->projector) {
+      ir->projector->accept(this);
+      projector = this->result;
+   }
+
+   /* Storage for our result.  Ideally for an assignment we'd be using
+    * the actual storage for the result here, instead.
+    */
+   result_src = get_temp(glsl_type::vec4_type);
+   result_dst = st_dst_reg(result_src);
+
+   switch (ir->op) {
+   case ir_tex:
+      opcode = OPCODE_TEX;
+      break;
+   case ir_txb:
+      opcode = OPCODE_TXB;
+      ir->lod_info.bias->accept(this);
+      lod_info = this->result;
+      break;
+   case ir_txl:
+      opcode = OPCODE_TXL;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
+      break;
+   case ir_txd:
+      opcode = OPCODE_TXD;
+      ir->lod_info.grad.dPdx->accept(this);
+      dx = this->result;
+      ir->lod_info.grad.dPdy->accept(this);
+      dy = this->result;
+      break;
+   case ir_txf: // TODO: use TGSI_OPCODE_TXF here
+      assert(!"GLSL 1.30 features unsupported");
+      break;
+   }
+
+   if (ir->projector) {
+      if (opcode == OPCODE_TEX) {
+         /* Slot the projector in as the last component of the coord. */
+         coord_dst.writemask = WRITEMASK_W;
+         emit(ir, OPCODE_MOV, coord_dst, projector);
+         coord_dst.writemask = WRITEMASK_XYZW;
+         opcode = OPCODE_TXP;
+      } else {
+         st_src_reg coord_w = coord;
+         coord_w.swizzle = SWIZZLE_WWWW;
+
+         /* For the other TEX opcodes there's no projective version
+          * since the last slot is taken up by lod info.  Do the
+          * projective divide now.
+          */
+         coord_dst.writemask = WRITEMASK_W;
+         emit(ir, OPCODE_RCP, coord_dst, projector);
+
+         /* In the case where we have to project the coordinates "by hand,"
+          * the shadow comparitor value must also be projected.
+          */
+         st_src_reg tmp_src = coord;
+         if (ir->shadow_comparitor) {
+            /* Slot the shadow value in as the second to last component of the
+             * coord.
+             */
+            ir->shadow_comparitor->accept(this);
+
+            tmp_src = get_temp(glsl_type::vec4_type);
+            st_dst_reg tmp_dst = st_dst_reg(tmp_src);
+
+            tmp_dst.writemask = WRITEMASK_Z;
+            emit(ir, OPCODE_MOV, tmp_dst, this->result);
+
+            tmp_dst.writemask = WRITEMASK_XY;
+            emit(ir, OPCODE_MOV, tmp_dst, coord);
+         }
+
+         coord_dst.writemask = WRITEMASK_XYZ;
+         emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
+
+         coord_dst.writemask = WRITEMASK_XYZW;
+         coord.swizzle = SWIZZLE_XYZW;
+      }
+   }
+
+   /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
+    * comparitor was put in the correct place (and projected) by the code,
+    * above, that handles by-hand projection.
+    */
+   if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) {
+      /* Slot the shadow value in as the second to last component of the
+       * coord.
+       */
+      ir->shadow_comparitor->accept(this);
+      coord_dst.writemask = WRITEMASK_Z;
+      emit(ir, OPCODE_MOV, coord_dst, this->result);
+      coord_dst.writemask = WRITEMASK_XYZW;
+   }
+
+   if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
+      /* Mesa IR stores lod or lod bias in the last channel of the coords. */
+      coord_dst.writemask = WRITEMASK_W;
+      emit(ir, OPCODE_MOV, coord_dst, lod_info);
+      coord_dst.writemask = WRITEMASK_XYZW;
+   }
+
+   if (opcode == OPCODE_TXD)
+      inst = emit(ir, opcode, result_dst, coord, dx, dy);
+   else
+      inst = emit(ir, opcode, result_dst, coord);
+
+   if (ir->shadow_comparitor)
+      inst->tex_shadow = GL_TRUE;
+
+   inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
+        					   this->shader_program,
+        					   this->prog);
+
+   const glsl_type *sampler_type = ir->sampler->type;
+
+   switch (sampler_type->sampler_dimensionality) {
+   case GLSL_SAMPLER_DIM_1D:
+      inst->tex_target = (sampler_type->sampler_array)
+         ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_2D:
+      inst->tex_target = (sampler_type->sampler_array)
+         ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_3D:
+      inst->tex_target = TEXTURE_3D_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_CUBE:
+      inst->tex_target = TEXTURE_CUBE_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_RECT:
+      inst->tex_target = TEXTURE_RECT_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_BUF:
+      assert(!"FINISHME: Implement ARB_texture_buffer_object");
+      break;
+   default:
+      assert(!"Should not get here.");
+   }
+
+   this->result = result_src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_return *ir)
+{
+   if (ir->get_value()) {
+      st_dst_reg l;
+      int i;
+
+      assert(current_function);
+
+      ir->get_value()->accept(this);
+      st_src_reg r = this->result;
+
+      l = st_dst_reg(current_function->return_reg);
+
+      for (i = 0; i < type_size(current_function->sig->return_type); i++) {
+         emit(ir, OPCODE_MOV, l, r);
+         l.index++;
+         r.index++;
+      }
+   }
+
+   emit(ir, OPCODE_RET);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_discard *ir)
+{
+   struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+
+   if (ir->condition) {
+      ir->condition->accept(this);
+      this->result.negate = ~this->result.negate;
+      emit(ir, OPCODE_KIL, undef_dst, this->result);
+   } else {
+      emit(ir, OPCODE_KIL_NV);
+   }
+
+   fp->UsesKill = GL_TRUE;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_if *ir)
+{
+   glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL;
+   glsl_to_tgsi_instruction *prev_inst;
+
+   prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+
+   ir->condition->accept(this);
+   assert(this->result.file != PROGRAM_UNDEFINED);
+
+   if (this->options->EmitCondCodes) {
+      cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+
+      /* See if we actually generated any instruction for generating
+       * the condition.  If not, then cook up a move to a temp so we
+       * have something to set cond_update on.
+       */
+      if (cond_inst == prev_inst) {
+         st_src_reg temp = get_temp(glsl_type::bool_type);
+         cond_inst = emit(ir->condition, OPCODE_MOV, st_dst_reg(temp), result);
+      }
+      cond_inst->cond_update = GL_TRUE;
+
+      if_inst = emit(ir->condition, OPCODE_IF);
+      if_inst->dst.cond_mask = COND_NE;
+   } else {
+      if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
+   }
+
+   this->instructions.push_tail(if_inst);
+
+   visit_exec_list(&ir->then_instructions, this);
+
+   if (!ir->else_instructions.is_empty()) {
+      else_inst = emit(ir->condition, OPCODE_ELSE);
+      visit_exec_list(&ir->else_instructions, this);
+   }
+
+   if_inst = emit(ir->condition, OPCODE_ENDIF);
+}
+
+glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
+{
+   result.file = PROGRAM_UNDEFINED;
+   next_temp = 1;
+   next_signature_id = 1;
+   current_function = NULL;
+   num_address_regs = 0;
+   indirect_addr_temps = false;
+   indirect_addr_consts = false;
+   mem_ctx = ralloc_context(NULL);
+}
+
+glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
+{
+   ralloc_free(mem_ctx);
+}
+
+extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
+{
+   delete v;
+}
+
+static struct prog_src_register
+mesa_st_src_reg_from_ir_st_src_reg(st_src_reg reg)
+{
+   struct prog_src_register mesa_reg;
+
+   mesa_reg.File = reg.file;
+   assert(reg.index < (1 << INST_INDEX_BITS));
+   mesa_reg.Index = reg.index;
+   mesa_reg.Swizzle = reg.swizzle;
+   mesa_reg.RelAddr = reg.reladdr != NULL;
+   mesa_reg.Negate = reg.negate;
+   mesa_reg.Abs = 0;
+   mesa_reg.HasIndex2 = GL_FALSE;
+   mesa_reg.RelAddr2 = 0;
+   mesa_reg.Index2 = 0;
+
+   return mesa_reg;
+}
+
+static void
+set_branchtargets(glsl_to_tgsi_visitor *v,
+        	  struct prog_instruction *mesa_instructions,
+        	  int num_instructions)
+{
+   int if_count = 0, loop_count = 0;
+   int *if_stack, *loop_stack;
+   int if_stack_pos = 0, loop_stack_pos = 0;
+   int i, j;
+
+   for (i = 0; i < num_instructions; i++) {
+      switch (mesa_instructions[i].Opcode) {
+      case OPCODE_IF:
+         if_count++;
+         break;
+      case OPCODE_BGNLOOP:
+         loop_count++;
+         break;
+      case OPCODE_BRK:
+      case OPCODE_CONT:
+         mesa_instructions[i].BranchTarget = -1;
+         break;
+      default:
+         break;
+      }
+   }
+
+   if_stack = rzalloc_array(v->mem_ctx, int, if_count);
+   loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
+
+   for (i = 0; i < num_instructions; i++) {
+      switch (mesa_instructions[i].Opcode) {
+      case OPCODE_IF:
+         if_stack[if_stack_pos] = i;
+         if_stack_pos++;
+         break;
+      case OPCODE_ELSE:
+         mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
+         if_stack[if_stack_pos - 1] = i;
+         break;
+      case OPCODE_ENDIF:
+         mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
+         if_stack_pos--;
+         break;
+      case OPCODE_BGNLOOP:
+         loop_stack[loop_stack_pos] = i;
+         loop_stack_pos++;
+         break;
+      case OPCODE_ENDLOOP:
+         loop_stack_pos--;
+         /* Rewrite any breaks/conts at this nesting level (haven't
+          * already had a BranchTarget assigned) to point to the end
+          * of the loop.
+          */
+         for (j = loop_stack[loop_stack_pos]; j < i; j++) {
+            if (mesa_instructions[j].Opcode == OPCODE_BRK ||
+        	mesa_instructions[j].Opcode == OPCODE_CONT) {
+               if (mesa_instructions[j].BranchTarget == -1) {
+        	  mesa_instructions[j].BranchTarget = i;
+               }
+            }
+         }
+         /* The loop ends point at each other. */
+         mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
+         mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
+         break;
+      case OPCODE_CAL:
+         foreach_iter(exec_list_iterator, iter, v->function_signatures) {
+            function_entry *entry = (function_entry *)iter.get();
+
+            if (entry->sig_id == mesa_instructions[i].BranchTarget) {
+               mesa_instructions[i].BranchTarget = entry->inst;
+               break;
+            }
+         }
+         break;
+      default:
+         break;
+      }
+   }
+}
+
+static void
+print_program(struct prog_instruction *mesa_instructions,
+              ir_instruction **mesa_instruction_annotation,
+              int num_instructions)
+{
+   /*ir_instruction *last_ir = NULL;*/
+   int i;
+   int indent = 0;
+
+   for (i = 0; i < num_instructions; i++) {
+      struct prog_instruction *mesa_inst = mesa_instructions + i;
+
+      fprintf(stdout, "%3d: ", i);
+
+#if 0
+/* Disable this for now, since printing GLSL IR along with its corresponding 
+ * Mesa IR makes the Mesa IR unreadable. */
+      ir_instruction *ir = mesa_instruction_annotation[i];
+      if (last_ir != ir && ir) {
+         int j;
+
+         for (j = 0; j < indent; j++) {
+            fprintf(stdout, " ");
+         }
+         ir->print();
+         printf("\n");
+         last_ir = ir;
+
+         fprintf(stdout, "     "); /* line number spacing. */
+      }
+#endif
+
+      indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
+        				    PROG_PRINT_DEBUG, NULL);
+   }
+}
+
+
+/**
+ * Count resources used by the given gpu program (number of texture
+ * samplers, etc).
+ */
+static void
+count_resources(struct gl_program *prog)
+{
+   unsigned int i;
+
+   prog->SamplersUsed = 0;
+
+   for (i = 0; i < prog->NumInstructions; i++) {
+      struct prog_instruction *inst = &prog->Instructions[i];
+
+      if (_mesa_is_tex_instruction(inst->Opcode)) {
+         prog->SamplerTargets[inst->TexSrcUnit] =
+            (gl_texture_index)inst->TexSrcTarget;
+         prog->SamplersUsed |= 1 << inst->TexSrcUnit;
+         if (inst->TexShadow) {
+            prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
+         }
+      }
+   }
+
+   _mesa_update_shader_textures_used(prog);
+}
+
+
+/**
+ * Check if the given vertex/fragment/shader program is within the
+ * resource limits of the context (number of texture units, etc).
+ * If any of those checks fail, record a linker error.
+ *
+ * XXX more checks are needed...
+ */
+static void
+check_resources(const struct gl_context *ctx,
+                struct gl_shader_program *shader_program,
+                struct gl_program *prog)
+{
+   switch (prog->Target) {
+   case GL_VERTEX_PROGRAM_ARB:
+      if (_mesa_bitcount(prog->SamplersUsed) >
+          ctx->Const.MaxVertexTextureImageUnits) {
+         fail_link(shader_program, "Too many vertex shader texture samplers");
+      }
+      if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
+         fail_link(shader_program, "Too many vertex shader constants");
+      }
+      break;
+   case MESA_GEOMETRY_PROGRAM:
+      if (_mesa_bitcount(prog->SamplersUsed) >
+          ctx->Const.MaxGeometryTextureImageUnits) {
+         fail_link(shader_program, "Too many geometry shader texture samplers");
+      }
+      if (prog->Parameters->NumParameters >
+          MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
+         fail_link(shader_program, "Too many geometry shader constants");
+      }
+      break;
+   case GL_FRAGMENT_PROGRAM_ARB:
+      if (_mesa_bitcount(prog->SamplersUsed) >
+          ctx->Const.MaxTextureImageUnits) {
+         fail_link(shader_program, "Too many fragment shader texture samplers");
+      }
+      if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
+         fail_link(shader_program, "Too many fragment shader constants");
+      }
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected program type in check_resources()");
+   }
+}
+
+
+
+struct uniform_sort {
+   struct gl_uniform *u;
+   int pos;
+};
+
+/* The shader_program->Uniforms list is almost sorted in increasing
+ * uniform->{Frag,Vert}Pos locations, but not quite when there are
+ * uniforms shared between targets.  We need to add parameters in
+ * increasing order for the targets.
+ */
+static int
+sort_uniforms(const void *a, const void *b)
+{
+   struct uniform_sort *u1 = (struct uniform_sort *)a;
+   struct uniform_sort *u2 = (struct uniform_sort *)b;
+
+   return u1->pos - u2->pos;
+}
+
+/* Add the uniforms to the parameters.  The linker chose locations
+ * in our parameters lists (which weren't created yet), which the
+ * uniforms code will use to poke values into our parameters list
+ * when uniforms are updated.
+ */
+static void
+add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
+        			struct gl_shader *shader,
+        			struct gl_program *prog)
+{
+   unsigned int i;
+   unsigned int next_sampler = 0, num_uniforms = 0;
+   struct uniform_sort *sorted_uniforms;
+
+   sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
+        			  shader_program->Uniforms->NumUniforms);
+
+   for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
+      struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
+      int parameter_index = -1;
+
+      switch (shader->Type) {
+      case GL_VERTEX_SHADER:
+         parameter_index = uniform->VertPos;
+         break;
+      case GL_FRAGMENT_SHADER:
+         parameter_index = uniform->FragPos;
+         break;
+      case GL_GEOMETRY_SHADER:
+         parameter_index = uniform->GeomPos;
+         break;
+      }
+
+      /* Only add uniforms used in our target. */
+      if (parameter_index != -1) {
+         sorted_uniforms[num_uniforms].pos = parameter_index;
+         sorted_uniforms[num_uniforms].u = uniform;
+         num_uniforms++;
+      }
+   }
+
+   qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
+         sort_uniforms);
+
+   for (i = 0; i < num_uniforms; i++) {
+      struct gl_uniform *uniform = sorted_uniforms[i].u;
+      int parameter_index = sorted_uniforms[i].pos;
+      const glsl_type *type = uniform->Type;
+      unsigned int size;
+
+      if (type->is_vector() ||
+          type->is_scalar()) {
+         size = type->vector_elements;
+      } else {
+         size = type_size(type) * 4;
+      }
+
+      gl_register_file file;
+      if (type->is_sampler() ||
+          (type->is_array() && type->fields.array->is_sampler())) {
+         file = PROGRAM_SAMPLER;
+      } else {
+         file = PROGRAM_UNIFORM;
+      }
+
+      GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
+        					 uniform->Name);
+
+      if (index < 0) {
+         index = _mesa_add_parameter(prog->Parameters, file,
+        			     uniform->Name, size, type->gl_type,
+        			     NULL, NULL, 0x0);
+
+         /* Sampler uniform values are stored in prog->SamplerUnits,
+          * and the entry in that array is selected by this index we
+          * store in ParameterValues[].
+          */
+         if (file == PROGRAM_SAMPLER) {
+            for (unsigned int j = 0; j < size / 4; j++)
+               prog->Parameters->ParameterValues[index + j][0] = next_sampler++;
+         }
+
+         /* The location chosen in the Parameters list here (returned
+          * from _mesa_add_uniform) has to match what the linker chose.
+          */
+         if (index != parameter_index) {
+            fail_link(shader_program, "Allocation of uniform `%s' to target "
+        	      "failed (%d vs %d)\n",
+        	      uniform->Name, index, parameter_index);
+         }
+      }
+   }
+
+   ralloc_free(sorted_uniforms);
+}
+
+static void
+set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
+        		struct gl_shader_program *shader_program,
+        		const char *name, const glsl_type *type,
+        		ir_constant *val)
+{
+   if (type->is_record()) {
+      ir_constant *field_constant;
+
+      field_constant = (ir_constant *)val->components.get_head();
+
+      for (unsigned int i = 0; i < type->length; i++) {
+         const glsl_type *field_type = type->fields.structure[i].type;
+         const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
+        				    type->fields.structure[i].name);
+         set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
+        			 field_type, field_constant);
+         field_constant = (ir_constant *)field_constant->next;
+      }
+      return;
+   }
+
+   int loc = _mesa_get_uniform_location(ctx, shader_program, name);
+
+   if (loc == -1) {
+      fail_link(shader_program,
+        	"Couldn't find uniform for initializer %s\n", name);
+      return;
+   }
+
+   for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
+      ir_constant *element;
+      const glsl_type *element_type;
+      if (type->is_array()) {
+         element = val->array_elements[i];
+         element_type = type->fields.array;
+      } else {
+         element = val;
+         element_type = type;
+      }
+
+      void *values;
+
+      if (element_type->base_type == GLSL_TYPE_BOOL) {
+         int *conv = ralloc_array(mem_ctx, int, element_type->components());
+         for (unsigned int j = 0; j < element_type->components(); j++) {
+            conv[j] = element->value.b[j];
+         }
+         values = (void *)conv;
+         element_type = glsl_type::get_instance(GLSL_TYPE_INT,
+        					element_type->vector_elements,
+        					1);
+      } else {
+         values = &element->value;
+      }
+
+      if (element_type->is_matrix()) {
+         _mesa_uniform_matrix(ctx, shader_program,
+        		      element_type->matrix_columns,
+        		      element_type->vector_elements,
+        		      loc, 1, GL_FALSE, (GLfloat *)values);
+         loc += element_type->matrix_columns;
+      } else {
+         _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
+        	       values, element_type->gl_type);
+         loc += type_size(element_type);
+      }
+   }
+}
+
+static void
+set_uniform_initializers(struct gl_context *ctx,
+        		 struct gl_shader_program *shader_program)
+{
+   void *mem_ctx = NULL;
+
+   for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
+      struct gl_shader *shader = shader_program->_LinkedShaders[i];
+
+      if (shader == NULL)
+         continue;
+
+      foreach_iter(exec_list_iterator, iter, *shader->ir) {
+         ir_instruction *ir = (ir_instruction *)iter.get();
+         ir_variable *var = ir->as_variable();
+
+         if (!var || var->mode != ir_var_uniform || !var->constant_value)
+            continue;
+
+         if (!mem_ctx)
+            mem_ctx = ralloc_context(NULL);
+
+         set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
+        			 var->type, var->constant_value);
+      }
+   }
+
+   ralloc_free(mem_ctx);
+}
+
+/* Replaces all references to a temporary register index with another index. */
+void
+glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
+{
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      unsigned j;
+      
+      for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+             inst->src[j].index == index) {
+            inst->src[j].index = new_index;
+         }
+      }
+      
+      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
+         inst->dst.index = new_index;
+      }
+   }
+}
+
+int
+glsl_to_tgsi_visitor::get_first_temp_read(int index)
+{
+   int depth = 0; /* loop depth */
+   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
+   unsigned i = 0, j;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+             inst->src[j].index == index) {
+            return (depth == 0) ? i : loop_start;
+         }
+      }
+      
+      if (inst->op == OPCODE_BGNLOOP) {
+         if(depth++ == 0)
+            loop_start = i;
+      } else if (inst->op == OPCODE_ENDLOOP) {
+         if (--depth == 0)
+            loop_start = -1;
+      }
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   return -1;
+}
+
+int
+glsl_to_tgsi_visitor::get_first_temp_write(int index)
+{
+   int depth = 0; /* loop depth */
+   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
+   int i = 0;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
+         return (depth == 0) ? i : loop_start;
+      }
+      
+      if (inst->op == OPCODE_BGNLOOP) {
+         if(depth++ == 0)
+            loop_start = i;
+      } else if (inst->op == OPCODE_ENDLOOP) {
+         if (--depth == 0)
+            loop_start = -1;
+      }
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   return -1;
+}
+
+int
+glsl_to_tgsi_visitor::get_last_temp_read(int index)
+{
+   int depth = 0; /* loop depth */
+   int last = -1; /* index of last instruction that reads the temporary */
+   unsigned i = 0, j;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+             inst->src[j].index == index) {
+            last = (depth == 0) ? i : -2;
+         }
+      }
+      
+      if (inst->op == OPCODE_BGNLOOP)
+         depth++;
+      else if (inst->op == OPCODE_ENDLOOP)
+         if (--depth == 0 && last == -2)
+            last = i;
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   assert(last >= -1);
+   return last;
+}
+
+int
+glsl_to_tgsi_visitor::get_last_temp_write(int index)
+{
+   int depth = 0; /* loop depth */
+   int last = -1; /* index of last instruction that writes to the temporary */
+   int i = 0;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
+         last = (depth == 0) ? i : -2;
+      
+      if (inst->op == OPCODE_BGNLOOP)
+         depth++;
+      else if (inst->op == OPCODE_ENDLOOP)
+         if (--depth == 0 && last == -2)
+            last = i;
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   assert(last >= -1);
+   return last;
+}
+
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY register
+ * channels for copy propagation and updates following instructions to
+ * use the original versions.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.  As an example, a TXP production before this pass:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
+ *
+ * and after:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ *
+ * which allows for dead code elimination on TEMP[1]'s writes.
+ */
+void
+glsl_to_tgsi_visitor::copy_propagate(void)
+{
+   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
+        					    glsl_to_tgsi_instruction *,
+        					    this->next_temp * 4);
+   int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+   int level = 0;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      assert(inst->dst.file != PROGRAM_TEMPORARY
+             || inst->dst.index < this->next_temp);
+
+      /* First, do any copy propagation possible into the src regs. */
+      for (int r = 0; r < 3; r++) {
+         glsl_to_tgsi_instruction *first = NULL;
+         bool good = true;
+         int acp_base = inst->src[r].index * 4;
+
+         if (inst->src[r].file != PROGRAM_TEMPORARY ||
+             inst->src[r].reladdr)
+            continue;
+
+         /* See if we can find entries in the ACP consisting of MOVs
+          * from the same src register for all the swizzled channels
+          * of this src register reference.
+          */
+         for (int i = 0; i < 4; i++) {
+            int src_chan = GET_SWZ(inst->src[r].swizzle, i);
+            glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
+
+            if (!copy_chan) {
+               good = false;
+               break;
+            }
+
+            assert(acp_level[acp_base + src_chan] <= level);
+
+            if (!first) {
+               first = copy_chan;
+            } else {
+               if (first->src[0].file != copy_chan->src[0].file ||
+        	   first->src[0].index != copy_chan->src[0].index) {
+        	  good = false;
+        	  break;
+               }
+            }
+         }
+
+         if (good) {
+            /* We've now validated that we can copy-propagate to
+             * replace this src register reference.  Do it.
+             */
+            inst->src[r].file = first->src[0].file;
+            inst->src[r].index = first->src[0].index;
+
+            int swizzle = 0;
+            for (int i = 0; i < 4; i++) {
+               int src_chan = GET_SWZ(inst->src[r].swizzle, i);
+               glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
+               swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
+        		   (3 * i));
+            }
+            inst->src[r].swizzle = swizzle;
+         }
+      }
+
+      switch (inst->op) {
+      case OPCODE_BGNLOOP:
+      case OPCODE_ENDLOOP:
+         /* End of a basic block, clear the ACP entirely. */
+         memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+         break;
+
+      case OPCODE_IF:
+         ++level;
+         break;
+
+      case OPCODE_ENDIF:
+      case OPCODE_ELSE:
+         /* Clear all channels written inside the block from the ACP, but
+          * leaving those that were not touched.
+          */
+         for (int r = 0; r < this->next_temp; r++) {
+            for (int c = 0; c < 4; c++) {
+               if (!acp[4 * r + c])
+        	  continue;
+
+               if (acp_level[4 * r + c] >= level)
+        	  acp[4 * r + c] = NULL;
+            }
+         }
+         if (inst->op == OPCODE_ENDIF)
+            --level;
+         break;
+
+      default:
+         /* Continuing the block, clear any written channels from
+          * the ACP.
+          */
+         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
+            /* Any temporary might be written, so no copy propagation
+             * across this instruction.
+             */
+            memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+         } else if (inst->dst.file == PROGRAM_OUTPUT &&
+        	    inst->dst.reladdr) {
+            /* Any output might be written, so no copy propagation
+             * from outputs across this instruction.
+             */
+            for (int r = 0; r < this->next_temp; r++) {
+               for (int c = 0; c < 4; c++) {
+        	  if (!acp[4 * r + c])
+        	     continue;
+
+        	  if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
+        	     acp[4 * r + c] = NULL;
+               }
+            }
+         } else if (inst->dst.file == PROGRAM_TEMPORARY ||
+        	    inst->dst.file == PROGRAM_OUTPUT) {
+            /* Clear where it's used as dst. */
+            if (inst->dst.file == PROGRAM_TEMPORARY) {
+               for (int c = 0; c < 4; c++) {
+        	  if (inst->dst.writemask & (1 << c)) {
+        	     acp[4 * inst->dst.index + c] = NULL;
+        	  }
+               }
+            }
+
+            /* Clear where it's used as src. */
+            for (int r = 0; r < this->next_temp; r++) {
+               for (int c = 0; c < 4; c++) {
+        	  if (!acp[4 * r + c])
+        	     continue;
+
+        	  int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
+
+        	  if (acp[4 * r + c]->src[0].file == inst->dst.file &&
+        	      acp[4 * r + c]->src[0].index == inst->dst.index &&
+        	      inst->dst.writemask & (1 << src_chan))
+        	  {
+        	     acp[4 * r + c] = NULL;
+        	  }
+               }
+            }
+         }
+         break;
+      }
+
+      /* If this is a copy, add it to the ACP. */
+      if (inst->op == OPCODE_MOV &&
+          inst->dst.file == PROGRAM_TEMPORARY &&
+          !inst->dst.reladdr &&
+          !inst->saturate &&
+          !inst->src[0].reladdr &&
+          !inst->src[0].negate) {
+         for (int i = 0; i < 4; i++) {
+            if (inst->dst.writemask & (1 << i)) {
+               acp[4 * inst->dst.index + i] = inst;
+               acp_level[4 * inst->dst.index + i] = level;
+            }
+         }
+      }
+   }
+
+   ralloc_free(acp_level);
+   ralloc_free(acp);
+}
+
+/*
+ * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.  As an example, a TXP production after copy propagation but 
+ * before this pass:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ *
+ * and after this pass:
+ *
+ * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ * 
+ * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
+ * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
+ */
+void
+glsl_to_tgsi_visitor::eliminate_dead_code(void)
+{
+   int i;
+   
+   for (i=0; i < this->next_temp; i++) {
+      int last_read = get_last_temp_read(i);
+      int j = 0;
+      
+      foreach_iter(exec_list_iterator, iter, this->instructions) {
+         glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
+             j > last_read)
+         {
+            iter.remove();
+            delete inst;
+         }
+         
+         j++;
+      }
+   }
+}
+
+/* Merges temporary registers together where possible to reduce the number of 
+ * registers needed to run a program.
+ * 
+ * Produces optimal code only after copy propagation and dead code elimination 
+ * have been run. */
+void
+glsl_to_tgsi_visitor::merge_registers(void)
+{
+   int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
+   int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
+   int i, j;
+   
+   /* Read the indices of the last read and first write to each temp register
+    * into an array so that we don't have to traverse the instruction list as 
+    * much. */
+   for (i=0; i < this->next_temp; i++) {
+      last_reads[i] = get_last_temp_read(i);
+      first_writes[i] = get_first_temp_write(i);
+   }
+   
+   /* Start looking for registers with non-overlapping usages that can be 
+    * merged together. */
+   for (i=0; i < this->next_temp - 1; i++) {
+      /* Don't touch unused registers. */
+      if (last_reads[i] < 0 || first_writes[i] < 0) continue;
+      
+      for (j=i+1; j < this->next_temp; j++) {
+         /* Don't touch unused registers. */
+         if (last_reads[j] < 0 || first_writes[j] < 0) continue;
+         
+         /* We can merge the two registers if the first write to j is after or 
+          * in the same instruction as the last read from i.  Note that the 
+          * register at index i will always be used earlier or at the same time 
+          * as the register at index j. */
+         assert(first_writes[i] <= first_writes[j]);
+         if (last_reads[i] <= first_writes[j]) {
+            rename_temp_register(j, i); /* Replace all references to j with i.*/
+            
+            /* Update the first_writes and last_reads arrays with the new 
+             * values for the merged register index, and mark the newly unused 
+             * register index as such. */
+            last_reads[i] = last_reads[j];
+            first_writes[j] = -1;
+            last_reads[j] = -1;
+         }
+      }
+   }
+   
+   ralloc_free(last_reads);
+   ralloc_free(first_writes);
+}
+
+/* Reassign indices to temporary registers by reusing unused indices created 
+ * by optimization passes. */
+void
+glsl_to_tgsi_visitor::renumber_registers(void)
+{
+   int i = 0;
+   int new_index = 0;
+   
+   for (i=0; i < this->next_temp; i++) {
+      if (get_first_temp_read(i) < 0) continue;
+      if (i != new_index)
+         rename_temp_register(i, new_index);
+      new_index++;
+   }
+   
+   this->next_temp = new_index;
+}
+
+/* ------------------------- TGSI conversion stuff -------------------------- */
+struct label {
+   unsigned branch_target;
+   unsigned token;
+};
+
+/**
+ * Intermediate state used during shader translation.
+ */
+struct st_translate {
+   struct ureg_program *ureg;
+
+   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
+   struct ureg_src *constants;
+   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_dst address[1];
+   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
+   struct ureg_src systemValues[SYSTEM_VALUE_MAX];
+
+   /* Extra info for handling point size clamping in vertex shader */
+   struct ureg_dst pointSizeResult; /**< Actual point size output register */
+   struct ureg_src pointSizeConst;  /**< Point size range constant register */
+   GLint pointSizeOutIndex;         /**< Temp point size output register */
+   GLboolean prevInstWrotePointSize;
+
+   const GLuint *inputMapping;
+   const GLuint *outputMapping;
+
+   /* For every instruction that contains a label (eg CALL), keep
+    * details so that we can go back afterwards and emit the correct
+    * tgsi instruction number for each label.
+    */
+   struct label *labels;
+   unsigned labels_size;
+   unsigned labels_count;
+
+   /* Keep a record of the tgsi instruction number that each mesa
+    * instruction starts at, will be used to fix up labels after
+    * translation.
+    */
+   unsigned *insn;
+   unsigned insn_size;
+   unsigned insn_count;
+
+   unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
+
+   boolean error;
+};
+
+/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
+static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
+   TGSI_SEMANTIC_FACE,
+   TGSI_SEMANTIC_INSTANCEID
+};
+
+/**
+ * Make note of a branch to a label in the TGSI code.
+ * After we've emitted all instructions, we'll go over the list
+ * of labels built here and patch the TGSI code with the actual
+ * location of each label.
+ */
+static unsigned *get_label( struct st_translate *t,
+                            unsigned branch_target )
+{
+   unsigned i;
+
+   if (t->labels_count + 1 >= t->labels_size) {
+      t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
+      t->labels = (struct label *)realloc(t->labels, 
+                                          t->labels_size * sizeof t->labels[0]);
+      if (t->labels == NULL) {
+         static unsigned dummy;
+         t->error = TRUE;
+         return &dummy;
+      }
+   }
+
+   i = t->labels_count++;
+   t->labels[i].branch_target = branch_target;
+   return &t->labels[i].token;
+}
+
+/**
+ * Called prior to emitting the TGSI code for each Mesa instruction.
+ * Allocate additional space for instructions if needed.
+ * Update the insn[] array so the next Mesa instruction points to
+ * the next TGSI instruction.
+ */
+static void set_insn_start( struct st_translate *t,
+                            unsigned start )
+{
+   if (t->insn_count + 1 >= t->insn_size) {
+      t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
+      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]);
+      if (t->insn == NULL) {
+         t->error = TRUE;
+         return;
+      }
+   }
+
+   t->insn[t->insn_count++] = start;
+}
+
+/**
+ * Map a Mesa dst register to a TGSI ureg_dst register.
+ */
+static struct ureg_dst
+dst_register( struct st_translate *t,
+              gl_register_file file,
+              GLuint index )
+{
+   switch( file ) {
+   case PROGRAM_UNDEFINED:
+      return ureg_dst_undef();
+
+   case PROGRAM_TEMPORARY:
+      if (ureg_dst_is_undef(t->temps[index]))
+         t->temps[index] = ureg_DECL_temporary( t->ureg );
+
+      return t->temps[index];
+
+   case PROGRAM_OUTPUT:
+      if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
+         t->prevInstWrotePointSize = GL_TRUE;
+
+      if (t->procType == TGSI_PROCESSOR_VERTEX)
+         assert(index < VERT_RESULT_MAX);
+      else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
+         assert(index < FRAG_RESULT_MAX);
+      else
+         assert(index < GEOM_RESULT_MAX);
+
+      assert(t->outputMapping[index] < Elements(t->outputs));
+
+      return t->outputs[t->outputMapping[index]];
+
+   case PROGRAM_ADDRESS:
+      return t->address[index];
+
+   default:
+      debug_assert( 0 );
+      return ureg_dst_undef();
+   }
+}
+
+/**
+ * Map a Mesa src register to a TGSI ureg_src register.
+ */
+static struct ureg_src
+src_register( struct st_translate *t,
+              gl_register_file file,
+              GLuint index )
+{
+   switch( file ) {
+   case PROGRAM_UNDEFINED:
+      return ureg_src_undef();
+
+   case PROGRAM_TEMPORARY:
+      assert(index >= 0);
+      assert(index < Elements(t->temps));
+      if (ureg_dst_is_undef(t->temps[index]))
+         t->temps[index] = ureg_DECL_temporary( t->ureg );
+      return ureg_src(t->temps[index]);
+
+   case PROGRAM_NAMED_PARAM:
+   case PROGRAM_ENV_PARAM:
+   case PROGRAM_LOCAL_PARAM:
+   case PROGRAM_UNIFORM:
+      assert(index >= 0);
+      return t->constants[index];
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_CONSTANT:       /* ie, immediate */
+      if (index < 0)
+         return ureg_DECL_constant( t->ureg, 0 );
+      else
+         return t->constants[index];
+
+   case PROGRAM_INPUT:
+      assert(t->inputMapping[index] < Elements(t->inputs));
+      return t->inputs[t->inputMapping[index]];
+
+   case PROGRAM_OUTPUT:
+      assert(t->outputMapping[index] < Elements(t->outputs));
+      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
+
+   case PROGRAM_ADDRESS:
+      return ureg_src(t->address[index]);
+
+   case PROGRAM_SYSTEM_VALUE:
+      assert(index < Elements(t->systemValues));
+      return t->systemValues[index];
+
+   default:
+      debug_assert( 0 );
+      return ureg_src_undef();
+   }
+}
+
+/**
+ * Create a TGSI ureg_dst register from a Mesa dest register.
+ */
+static struct ureg_dst
+translate_dst( struct st_translate *t,
+               const st_dst_reg *dst_reg, //const struct prog_dst_register *DstReg,
+               boolean saturate )
+{
+   struct ureg_dst dst = dst_register( t, 
+                                       dst_reg->file,
+                                       dst_reg->index );
+
+   dst = ureg_writemask( dst, 
+                         dst_reg->writemask );
+   
+   if (saturate)
+      dst = ureg_saturate( dst );
+
+   if (dst_reg->reladdr != NULL)
+      dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
+
+   return dst;
+}
+
+/**
+ * Create a TGSI ureg_src register from a Mesa src register.
+ */
+static struct ureg_src
+translate_src( struct st_translate *t,
+               const st_src_reg *src_reg )
+{
+   struct ureg_src src = src_register( t, src_reg->file, src_reg->index );
+
+   src = ureg_swizzle( src,
+                       GET_SWZ( src_reg->swizzle, 0 ) & 0x3,
+                       GET_SWZ( src_reg->swizzle, 1 ) & 0x3,
+                       GET_SWZ( src_reg->swizzle, 2 ) & 0x3,
+                       GET_SWZ( src_reg->swizzle, 3 ) & 0x3);
+
+   if ((src_reg->negate & 0xf) == NEGATE_XYZW)
+      src = ureg_negate(src);
+
+#if 0
+   // src_reg currently does not have an equivalent to SrcReg->Abs in Mesa IR
+   if (src_reg->abs) 
+      src = ureg_abs(src);
+#endif
+
+   if (src_reg->reladdr != NULL) {
+      /* Normally ureg_src_indirect() would be used here, but a stupid compiler 
+       * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 
+       * set the bit for src.Negate.  So we have to do the operation manually
+       * here to work around the compiler's problems. */
+      /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
+      struct ureg_src addr = ureg_src(t->address[0]);
+      src.Indirect = 1;
+      src.IndirectFile = addr.File;
+      src.IndirectIndex = addr.Index;
+      src.IndirectSwizzle = addr.SwizzleX;
+      
+      if (src_reg->file != PROGRAM_INPUT &&
+          src_reg->file != PROGRAM_OUTPUT) {
+         /* If src_reg->index was negative, it was set to zero in
+          * src_register().  Reassign it now.  But don't do this
+          * for input/output regs since they get remapped while
+          * const buffers don't.
+          */
+         src.Index = src_reg->index;
+      }
+   }
+
+   return src;
+}
+
+static void
+compile_tgsi_instruction(struct st_translate *t, 
+        			     const struct glsl_to_tgsi_instruction *inst)
+{
+   struct ureg_program *ureg = t->ureg;
+   GLuint i;
+   struct ureg_dst dst[1];
+   struct ureg_src src[4];
+   unsigned num_dst;
+   unsigned num_src;
+
+   num_dst = _mesa_num_inst_dst_regs( inst->op );
+   num_src = _mesa_num_inst_src_regs( inst->op );
+
+   if (num_dst) 
+      dst[0] = translate_dst( t, 
+                              &inst->dst,
+                              inst->saturate); // inst->SaturateMode
+
+   for (i = 0; i < num_src; i++) 
+      src[i] = translate_src( t, &inst->src[i] );
+
+   switch( inst->op ) {
+   case OPCODE_SWZ:
+      // TODO: copy emit_swz function from st_mesa_to_tgsi.c
+      //emit_swz( t, dst[0], &inst->src[0] );
+      assert(!"OPCODE_SWZ");
+      return;
+
+   case OPCODE_BGNLOOP:
+   case OPCODE_CAL:
+   case OPCODE_ELSE:
+   case OPCODE_ENDLOOP:
+   case OPCODE_IF:
+      debug_assert(num_dst == 0);
+      ureg_label_insn( ureg,
+                       translate_opcode( inst->op ),
+                       src, num_src,
+                       get_label( t, 
+                                  inst->op == OPCODE_CAL ? inst->function->sig_id : 0 ));
+      return;
+
+   case OPCODE_TEX:
+   case OPCODE_TXB:
+   case OPCODE_TXD:
+   case OPCODE_TXL:
+   case OPCODE_TXP:
+      src[num_src++] = t->samplers[inst->sampler];
+      ureg_tex_insn( ureg,
+                     translate_opcode( inst->op ),
+                     dst, num_dst, 
+                     translate_texture_target( inst->tex_target,
+                                               inst->tex_shadow ),
+                     src, num_src );
+      return;
+
+   case OPCODE_SCS:
+      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
+      ureg_insn( ureg, 
+                 translate_opcode( inst->op ), 
+                 dst, num_dst, 
+                 src, num_src );
+      break;
+
+   case OPCODE_XPD:
+      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
+      ureg_insn( ureg, 
+                 translate_opcode( inst->op ), 
+                 dst, num_dst, 
+                 src, num_src );
+      break;
+
+   case OPCODE_NOISE1:
+   case OPCODE_NOISE2:
+   case OPCODE_NOISE3:
+   case OPCODE_NOISE4:
+      /* At some point, a motivated person could add a better
+       * implementation of noise.  Currently not even the nvidia
+       * binary drivers do anything more than this.  In any case, the
+       * place to do this is in the GL state tracker, not the poor
+       * driver.
+       */
+      ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
+      break;
+		 
+   case OPCODE_DDY:
+      // TODO: copy emit_ddy() function from st_mesa_to_tgsi.c
+      assert(!"OPCODE_DDY");
+      //emit_ddy( t, dst[0], &inst->src[0] );
+      break;
+
+   default:
+      ureg_insn( ureg, 
+                 translate_opcode( inst->op ), 
+                 dst, num_dst, 
+                 src, num_src );
+      break;
+   }
+}
+
+/**
+ * Emit the TGSI instructions to adjust the WPOS pixel center convention
+ * Basically, add (adjX, adjY) to the fragment position.
+ */
+static void
+emit_adjusted_wpos( struct st_translate *t,
+                    const struct gl_program *program,
+                    GLfloat adjX, GLfloat adjY)
+{
+   struct ureg_program *ureg = t->ureg;
+   struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
+   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+
+   /* Note that we bias X and Y and pass Z and W through unchanged.
+    * The shader might also use gl_FragCoord.w and .z.
+    */
+   ureg_ADD(ureg, wpos_temp, wpos_input,
+            ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
+
+   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+}
+
+
+/**
+ * Emit the TGSI instructions for inverting the WPOS y coordinate.
+ * This code is unavoidable because it also depends on whether
+ * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
+ */
+static void
+emit_wpos_inversion( struct st_translate *t,
+                     const struct gl_program *program,
+                     boolean invert)
+{
+   struct ureg_program *ureg = t->ureg;
+
+   /* Fragment program uses fragment position input.
+    * Need to replace instances of INPUT[WPOS] with temp T
+    * where T = INPUT[WPOS] by y is inverted.
+    */
+   static const gl_state_index wposTransformState[STATE_LENGTH]
+      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 
+          (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
+   
+   /* XXX: note we are modifying the incoming shader here!  Need to
+    * do this before emitting the constant decls below, or this
+    * will be missed:
+    */
+   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
+                                                       wposTransformState);
+
+   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
+   struct ureg_dst wpos_temp;
+   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+
+   /* MOV wpos_temp, input[wpos]
+    */
+   if (wpos_input.File == TGSI_FILE_TEMPORARY)
+      wpos_temp = ureg_dst(wpos_input);
+   else {
+      wpos_temp = ureg_DECL_temporary( ureg );
+      ureg_MOV( ureg, wpos_temp, wpos_input );
+   }
+
+   if (invert) {
+      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
+       */
+      ureg_MAD( ureg,
+                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
+                wpos_input,
+                ureg_scalar(wpostrans, 0),
+                ureg_scalar(wpostrans, 1));
+   } else {
+      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
+       */
+      ureg_MAD( ureg,
+                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
+                wpos_input,
+                ureg_scalar(wpostrans, 2),
+                ureg_scalar(wpostrans, 3));
+   }
+
+   /* Use wpos_temp as position input from here on:
+    */
+   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+}
+
+
+/**
+ * Emit fragment position/ooordinate code.
+ */
+static void
+emit_wpos(struct st_context *st,
+          struct st_translate *t,
+          const struct gl_program *program,
+          struct ureg_program *ureg)
+{
+   const struct gl_fragment_program *fp =
+      (const struct gl_fragment_program *) program;
+   struct pipe_screen *pscreen = st->pipe->screen;
+   boolean invert = FALSE;
+
+   if (fp->OriginUpperLeft) {
+      /* Fragment shader wants origin in upper-left */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
+         /* the driver supports upper-left origin */
+      }
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
+         /* the driver supports lower-left origin, need to invert Y */
+         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+         invert = TRUE;
+      }
+      else
+         assert(0);
+   }
+   else {
+      /* Fragment shader wants origin in lower-left */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
+         /* the driver supports lower-left origin */
+         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
+         /* the driver supports upper-left origin, need to invert Y */
+         invert = TRUE;
+      else
+         assert(0);
+   }
+   
+   if (fp->PixelCenterInteger) {
+      /* Fragment shader wants pixel center integer */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
+         /* the driver supports pixel center integer */
+         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
+         /* the driver supports pixel center half integer, need to bias X,Y */
+         emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
+      else
+         assert(0);
+   }
+   else {
+      /* Fragment shader wants pixel center half integer */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
+         /* the driver supports pixel center half integer */
+      }
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
+         /* the driver supports pixel center integer, need to bias X,Y */
+         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+         emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
+      }
+      else
+         assert(0);
+   }
+
+   /* we invert after adjustment so that we avoid the MOV to temporary,
+    * and reuse the adjustment ADD instead */
+   emit_wpos_inversion(t, program, invert);
+}
+
+/**
+ * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
+ * \param program  the program to translate
+ * \param numInputs  number of input registers used
+ * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
+ *                      input indexes
+ * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
+ * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
+ *                            each input
+ * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
+ * \param numOutputs  number of output registers used
+ * \param outputMapping  maps Mesa fragment program outputs to TGSI
+ *                       generic outputs
+ * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
+ * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
+ *                             each output
+ *
+ * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
+ */
+extern "C" enum pipe_error
+st_translate_program(
+   struct gl_context *ctx,
+   uint procType,
+   struct ureg_program *ureg,
+   glsl_to_tgsi_visitor *program,
+   const struct gl_program *proginfo,
+   GLuint numInputs,
+   const GLuint inputMapping[],
+   const ubyte inputSemanticName[],
+   const ubyte inputSemanticIndex[],
+   const GLuint interpMode[],
+   GLuint numOutputs,
+   const GLuint outputMapping[],
+   const ubyte outputSemanticName[],
+   const ubyte outputSemanticIndex[],
+   boolean passthrough_edgeflags )
+{
+   struct st_translate translate, *t;
+   unsigned i;
+   enum pipe_error ret = PIPE_OK;
+
+   assert(numInputs <= Elements(t->inputs));
+   assert(numOutputs <= Elements(t->outputs));
+
+   t = &translate;
+   memset(t, 0, sizeof *t);
+
+   t->procType = procType;
+   t->inputMapping = inputMapping;
+   t->outputMapping = outputMapping;
+   t->ureg = ureg;
+   t->pointSizeOutIndex = -1;
+   t->prevInstWrotePointSize = GL_FALSE;
+
+   /*_mesa_print_program(program);*/
+
+   /*
+    * Declare input attributes.
+    */
+   if (procType == TGSI_PROCESSOR_FRAGMENT) {
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_fs_input(ureg,
+                                           inputSemanticName[i],
+                                           inputSemanticIndex[i],
+                                           interpMode[i]);
+      }
+
+      if (proginfo->InputsRead & FRAG_BIT_WPOS) {
+         /* Must do this after setting up t->inputs, and before
+          * emitting constant references, below:
+          */
+          printf("FRAG_BIT_WPOS\n");
+          emit_wpos(st_context(ctx), t, proginfo, ureg);
+      }
+
+      if (proginfo->InputsRead & FRAG_BIT_FACE) {
+         // TODO: uncomment
+         printf("FRAG_BIT_FACE\n");
+         //emit_face_var( t, program );
+      }
+
+      /*
+       * Declare output attributes.
+       */
+      for (i = 0; i < numOutputs; i++) {
+         switch (outputSemanticName[i]) {
+         case TGSI_SEMANTIC_POSITION:
+            t->outputs[i] = ureg_DECL_output( ureg,
+                                              TGSI_SEMANTIC_POSITION, /* Z / Depth */
+                                              outputSemanticIndex[i] );
+
+            t->outputs[i] = ureg_writemask( t->outputs[i],
+                                            TGSI_WRITEMASK_Z );
+            break;
+         case TGSI_SEMANTIC_STENCIL:
+            t->outputs[i] = ureg_DECL_output( ureg,
+                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
+                                              outputSemanticIndex[i] );
+            t->outputs[i] = ureg_writemask( t->outputs[i],
+                                            TGSI_WRITEMASK_Y );
+            break;
+         case TGSI_SEMANTIC_COLOR:
+            t->outputs[i] = ureg_DECL_output( ureg,
+                                              TGSI_SEMANTIC_COLOR,
+                                              outputSemanticIndex[i] );
+            break;
+         default:
+            debug_assert(0);
+            return PIPE_ERROR_BAD_INPUT;
+         }
+      }
+   }
+   else if (procType == TGSI_PROCESSOR_GEOMETRY) {
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_gs_input(ureg,
+                                           i,
+                                           inputSemanticName[i],
+                                           inputSemanticIndex[i]);
+      }
+
+      for (i = 0; i < numOutputs; i++) {
+         t->outputs[i] = ureg_DECL_output( ureg,
+                                           outputSemanticName[i],
+                                           outputSemanticIndex[i] );
+      }
+   }
+   else {
+      assert(procType == TGSI_PROCESSOR_VERTEX);
+
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
+      }
+
+      for (i = 0; i < numOutputs; i++) {
+         t->outputs[i] = ureg_DECL_output( ureg,
+                                           outputSemanticName[i],
+                                           outputSemanticIndex[i] );
+         if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
+            /* Writing to the point size result register requires special
+             * handling to implement clamping.
+             */
+            static const gl_state_index pointSizeClampState[STATE_LENGTH]
+               = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
+               /* XXX: note we are modifying the incoming shader here!  Need to
+               * do this before emitting the constant decls below, or this
+               * will be missed.
+               * XXX: depends on "Parameters" field specific to Mesa IR
+               */
+            unsigned pointSizeClampConst =
+               _mesa_add_state_reference(proginfo->Parameters,
+                                         pointSizeClampState);
+            struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
+            t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
+            t->pointSizeResult = t->outputs[i];
+            t->pointSizeOutIndex = i;
+            t->outputs[i] = psizregtemp;
+         }
+      }
+      /*if (passthrough_edgeflags)
+         emit_edgeflags( t, program ); */ // TODO: uncomment
+   }
+
+   /* Declare address register.
+    */
+   if (program->num_address_regs > 0) {
+      debug_assert( program->num_address_regs == 1 );
+      t->address[0] = ureg_DECL_address( ureg );
+   }
+
+   /* Declare misc input registers
+    */
+   {
+      GLbitfield sysInputs = proginfo->SystemValuesRead;
+      unsigned numSys = 0;
+      for (i = 0; sysInputs; i++) {
+         if (sysInputs & (1 << i)) {
+            unsigned semName = mesa_sysval_to_semantic[i];
+            t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
+            numSys++;
+            sysInputs &= ~(1 << i);
+         }
+      }
+   }
+
+   if (program->indirect_addr_temps) {
+      /* If temps are accessed with indirect addressing, declare temporaries
+       * in sequential order.  Else, we declare them on demand elsewhere.
+       * (Note: the number of temporaries is equal to program->next_temp)
+       */
+      for (i = 0; i < (unsigned)program->next_temp; i++) {
+         /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
+         t->temps[i] = ureg_DECL_temporary( t->ureg );
+      }
+   }
+
+   /* Emit constants and immediates.  Mesa uses a single index space
+    * for these, so we put all the translated regs in t->constants.
+    * XXX: this entire if block depends on proginfo->Parameters from Mesa IR
+    */
+   if (proginfo->Parameters) {
+      t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] );
+      if (t->constants == NULL) {
+         ret = PIPE_ERROR_OUT_OF_MEMORY;
+         goto out;
+      }
+
+      for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
+         switch (proginfo->Parameters->Parameters[i].Type) {
+         case PROGRAM_ENV_PARAM:
+         case PROGRAM_LOCAL_PARAM:
+         case PROGRAM_STATE_VAR:
+         case PROGRAM_NAMED_PARAM:
+         case PROGRAM_UNIFORM:
+            t->constants[i] = ureg_DECL_constant( ureg, i );
+            break;
+
+            /* Emit immediates only when there's no indirect addressing of
+             * the const buffer.
+             * FIXME: Be smarter and recognize param arrays:
+             * indirect addressing is only valid within the referenced
+             * array.
+             */
+         case PROGRAM_CONSTANT:
+            if (program->indirect_addr_consts)
+               t->constants[i] = ureg_DECL_constant( ureg, i );
+            else
+               t->constants[i] = 
+                  ureg_DECL_immediate( ureg,
+                                       proginfo->Parameters->ParameterValues[i],
+                                       4 );
+            break;
+         default:
+            break;
+         }
+      }
+   }
+
+   /* texture samplers */
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+      // XXX: depends on SamplersUsed property generated by conversion to Mesa IR
+      if (proginfo->SamplersUsed & (1 << i)) {
+         t->samplers[i] = ureg_DECL_sampler( ureg, i );
+      }
+   }
+
+   /* Emit each instruction in turn:
+    */
+   foreach_iter(exec_list_iterator, iter, program->instructions) {
+      set_insn_start( t, ureg_get_instruction_number( ureg ));
+      compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() );
+
+      if (t->prevInstWrotePointSize && proginfo->Id) {
+         /* The previous instruction wrote to the (fake) vertex point size
+          * result register.  Now we need to clamp that value to the min/max
+          * point size range, putting the result into the real point size
+          * register.
+          * Note that we can't do this easily at the end of program due to
+          * possible early return.
+          */
+         set_insn_start( t, ureg_get_instruction_number( ureg ));
+         ureg_MAX( t->ureg,
+                   ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
+                   ureg_src(t->outputs[t->pointSizeOutIndex]),
+                   ureg_swizzle(t->pointSizeConst, 1,1,1,1));
+         ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
+                   ureg_src(t->outputs[t->pointSizeOutIndex]),
+                   ureg_swizzle(t->pointSizeConst, 2,2,2,2));
+      }
+      t->prevInstWrotePointSize = GL_FALSE;
+   }
+
+   /* Fix up all emitted labels:
+    */
+   for (i = 0; i < t->labels_count; i++) {
+      ureg_fixup_label( ureg,
+                        t->labels[i].token,
+                        t->insn[t->labels[i].branch_target] );
+   }
+
+out:
+   FREE(t->insn);
+   FREE(t->labels);
+   FREE(t->constants);
+
+   if (t->error) {
+      debug_printf("%s: translate error flag set\n", __FUNCTION__);
+   }
+
+   return ret;
+}
+/* ----------------------------- End TGSI code ------------------------------ */
+
+/**
+ * Convert a shader's GLSL IR into both a Mesa gl_program and a TGSI shader.
+ */
+static struct gl_program *
+get_mesa_program(struct gl_context *ctx,
+                 struct gl_shader_program *shader_program,
+        	 struct gl_shader *shader)
+{
+   glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
+   struct prog_instruction *mesa_instructions, *mesa_inst;
+   ir_instruction **mesa_instruction_annotation;
+   int i;
+   struct gl_program *prog;
+   GLenum target;
+   const char *target_string;
+   GLboolean progress;
+   struct gl_shader_compiler_options *options =
+         &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
+
+   switch (shader->Type) {
+   case GL_VERTEX_SHADER:
+      target = GL_VERTEX_PROGRAM_ARB;
+      target_string = "vertex";
+      break;
+   case GL_FRAGMENT_SHADER:
+      target = GL_FRAGMENT_PROGRAM_ARB;
+      target_string = "fragment";
+      break;
+   case GL_GEOMETRY_SHADER:
+      target = GL_GEOMETRY_PROGRAM_NV;
+      target_string = "geometry";
+      break;
+   default:
+      assert(!"should not be reached");
+      return NULL;
+   }
+
+   validate_ir_tree(shader->ir);
+
+   prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
+   if (!prog)
+      return NULL;
+   prog->Parameters = _mesa_new_parameter_list();
+   prog->Varying = _mesa_new_parameter_list();
+   prog->Attributes = _mesa_new_parameter_list();
+   v->ctx = ctx;
+   v->prog = prog;
+   v->shader_program = shader_program;
+   v->options = options;
+
+   add_uniforms_to_parameters_list(shader_program, shader, prog);
+
+   /* Emit Mesa IR for main(). */
+   visit_exec_list(shader->ir, v);
+   v->emit(NULL, OPCODE_END);
+
+   /* Now emit bodies for any functions that were used. */
+   do {
+      progress = GL_FALSE;
+
+      foreach_iter(exec_list_iterator, iter, v->function_signatures) {
+         function_entry *entry = (function_entry *)iter.get();
+
+         if (!entry->bgn_inst) {
+            v->current_function = entry;
+
+            entry->bgn_inst = v->emit(NULL, OPCODE_BGNSUB);
+            entry->bgn_inst->function = entry;
+
+            visit_exec_list(&entry->sig->body, v);
+
+            glsl_to_tgsi_instruction *last;
+            last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
+            if (last->op != OPCODE_RET)
+               v->emit(NULL, OPCODE_RET);
+
+            glsl_to_tgsi_instruction *end;
+            end = v->emit(NULL, OPCODE_ENDSUB);
+            end->function = entry;
+
+            progress = GL_TRUE;
+         }
+      }
+   } while (progress);
+
+#if 0
+   /* Print out some information (for debugging purposes) used by the 
+    * optimization passes. */
+   for (i=0; i < v->next_temp; i++) {
+      int fr = v->get_first_temp_read(i);
+      int fw = v->get_first_temp_write(i);
+      int lr = v->get_last_temp_read(i);
+      int lw = v->get_last_temp_write(i);
+      
+      printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
+      assert(fw <= fr);
+   }
+#endif
+
+   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
+   v->copy_propagate();
+   v->eliminate_dead_code();
+   v->merge_registers();
+   v->renumber_registers();
+
+   prog->NumTemporaries = v->next_temp;
+
+   int num_instructions = 0;
+   foreach_iter(exec_list_iterator, iter, v->instructions) {
+      num_instructions++;
+   }
+
+   mesa_instructions =
+      (struct prog_instruction *)calloc(num_instructions,
+        				sizeof(*mesa_instructions));
+   mesa_instruction_annotation = ralloc_array(v->mem_ctx, ir_instruction *,
+        				      num_instructions);
+
+   /* Convert glsl_to_tgsi_instructions into Mesa IR prog_instructions.
+    * TODO: remove
+    */
+   mesa_inst = mesa_instructions;
+   i = 0;
+   foreach_iter(exec_list_iterator, iter, v->instructions) {
+      const glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      mesa_inst->Opcode = inst->op;
+      mesa_inst->CondUpdate = inst->cond_update;
+      if (inst->saturate)
+         mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
+      mesa_inst->DstReg.File = inst->dst.file;
+      mesa_inst->DstReg.Index = inst->dst.index;
+      mesa_inst->DstReg.CondMask = inst->dst.cond_mask;
+      mesa_inst->DstReg.WriteMask = inst->dst.writemask;
+      mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
+      mesa_inst->SrcReg[0] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[0]);
+      mesa_inst->SrcReg[1] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[1]);
+      mesa_inst->SrcReg[2] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[2]);
+      mesa_inst->TexSrcUnit = inst->sampler;
+      mesa_inst->TexSrcTarget = inst->tex_target;
+      mesa_inst->TexShadow = inst->tex_shadow;
+      mesa_instruction_annotation[i] = inst->ir;
+
+      /* Set IndirectRegisterFiles. */
+      if (mesa_inst->DstReg.RelAddr)
+         prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
+
+      /* Update program's bitmask of indirectly accessed register files */
+      for (unsigned src = 0; src < 3; src++)
+         if (mesa_inst->SrcReg[src].RelAddr)
+            prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
+
+      if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
+         fail_link(shader_program, "Couldn't flatten if statement\n");
+      }
+
+      switch (mesa_inst->Opcode) {
+      case OPCODE_BGNSUB:
+         inst->function->inst = i;
+         mesa_inst->Comment = strdup(inst->function->sig->function_name());
+         break;
+      case OPCODE_ENDSUB:
+         mesa_inst->Comment = strdup(inst->function->sig->function_name());
+         break;
+      case OPCODE_CAL:
+         mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
+         break;
+      case OPCODE_ARL:
+         prog->NumAddressRegs = 1;
+         break;
+      default:
+         break;
+      }
+
+      mesa_inst++;
+      i++;
+
+      if (!shader_program->LinkStatus)
+         break;
+   }
+
+   if (!shader_program->LinkStatus) {
+      free(mesa_instructions);
+      _mesa_reference_program(ctx, &shader->Program, NULL);
+      return NULL;
+   }
+
+   set_branchtargets(v, mesa_instructions, num_instructions);
+
+   if (ctx->Shader.Flags & GLSL_DUMP) {
+      printf("\n");
+      printf("GLSL IR for linked %s program %d:\n", target_string,
+             shader_program->Name);
+      _mesa_print_ir(shader->ir, NULL);
+      printf("\n");
+      printf("\n");
+      printf("Mesa IR for linked %s program %d:\n", target_string,
+             shader_program->Name);
+      print_program(mesa_instructions, mesa_instruction_annotation,
+        	    num_instructions);
+   }
+
+   prog->Instructions = mesa_instructions;
+   prog->NumInstructions = num_instructions;
+
+   do_set_program_inouts(shader->ir, prog);
+   count_resources(prog);
+
+   check_resources(ctx, shader_program, prog);
+
+   _mesa_reference_program(ctx, &shader->Program, prog);
+
+   if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
+      _mesa_optimize_program(ctx, prog);
+   }
+   
+   struct st_vertex_program *stvp;
+   struct st_fragment_program *stfp;
+   struct st_geometry_program *stgp;
+   
+   switch (shader->Type) {
+   case GL_VERTEX_SHADER:
+      stvp = (struct st_vertex_program *)prog;
+      stvp->glsl_to_tgsi = v;
+      break;
+   case GL_FRAGMENT_SHADER:
+      stfp = (struct st_fragment_program *)prog;
+      stfp->glsl_to_tgsi = v;
+      break;
+   case GL_GEOMETRY_SHADER:
+      stgp = (struct st_geometry_program *)prog;
+      stgp->glsl_to_tgsi = v;
+      break;
+   default:
+      assert(!"should not be reached");
+      return NULL;
+   }
+
+   return prog;
+}
+
+extern "C" {
+
+struct gl_shader *
+st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
+{
+   struct gl_shader *shader;
+   assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
+          type == GL_GEOMETRY_SHADER_ARB);
+   shader = rzalloc(NULL, struct gl_shader);
+   if (shader) {
+      shader->Type = type;
+      shader->Name = name;
+      _mesa_init_shader(ctx, shader);
+   }
+   return shader;
+}
+
+struct gl_shader_program *
+st_new_shader_program(struct gl_context *ctx, GLuint name)
+{
+   struct gl_shader_program *shProg;
+   shProg = rzalloc(NULL, struct gl_shader_program);
+   if (shProg) {
+      shProg->Name = name;
+      _mesa_init_shader_program(ctx, shProg);
+   }
+   return shProg;
+}
+
+/**
+ * Link a shader.
+ * Called via ctx->Driver.LinkShader()
+ * This actually involves converting GLSL IR into Mesa gl_programs with
+ * code lowering and other optimizations.
+ */
+GLboolean
+st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   assert(prog->LinkStatus);
+
+   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+      if (prog->_LinkedShaders[i] == NULL)
+         continue;
+
+      bool progress;
+      exec_list *ir = prog->_LinkedShaders[i]->ir;
+      const struct gl_shader_compiler_options *options =
+            &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
+
+      do {
+         progress = false;
+
+         /* Lowering */
+         do_mat_op_to_vec(ir);
+         lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
+        			 | LOG_TO_LOG2
+        			 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
+
+         progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
+
+         progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
+
+         progress = lower_quadop_vector(ir, true) || progress;
+
+         if (options->EmitNoIfs) {
+            progress = lower_discard(ir) || progress;
+            progress = lower_if_to_cond_assign(ir) || progress;
+         }
+
+         if (options->EmitNoNoise)
+            progress = lower_noise(ir) || progress;
+
+         /* If there are forms of indirect addressing that the driver
+          * cannot handle, perform the lowering pass.
+          */
+         if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
+             || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
+           progress =
+             lower_variable_index_to_cond_assign(ir,
+        					 options->EmitNoIndirectInput,
+        					 options->EmitNoIndirectOutput,
+        					 options->EmitNoIndirectTemp,
+        					 options->EmitNoIndirectUniform)
+             || progress;
+
+         progress = do_vec_index_to_cond_assign(ir) || progress;
+      } while (progress);
+
+      validate_ir_tree(ir);
+   }
+
+   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+      struct gl_program *linked_prog;
+
+      if (prog->_LinkedShaders[i] == NULL)
+         continue;
+
+      linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+
+      if (linked_prog) {
+         bool ok = true;
+
+         switch (prog->_LinkedShaders[i]->Type) {
+         case GL_VERTEX_SHADER:
+            _mesa_reference_vertprog(ctx, &prog->VertexProgram,
+                                     (struct gl_vertex_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
+                                                 linked_prog);
+            break;
+         case GL_FRAGMENT_SHADER:
+            _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
+                                     (struct gl_fragment_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
+                                                 linked_prog);
+            break;
+         case GL_GEOMETRY_SHADER:
+            _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
+                                     (struct gl_geometry_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
+                                                 linked_prog);
+            break;
+         }
+         if (!ok) {
+            return GL_FALSE;
+         }
+      }
+
+      _mesa_reference_program(ctx, &linked_prog, NULL);
+   }
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Link a GLSL shader program.  Called via glLinkProgram().
+ */
+void
+st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   unsigned int i;
+
+   _mesa_clear_shader_program_data(ctx, prog);
+
+   prog->LinkStatus = GL_TRUE;
+
+   for (i = 0; i < prog->NumShaders; i++) {
+      if (!prog->Shaders[i]->CompileStatus) {
+         fail_link(prog, "linking with uncompiled shader");
+         prog->LinkStatus = GL_FALSE;
+      }
+   }
+
+   prog->Varying = _mesa_new_parameter_list();
+   _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
+   _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
+   _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
+
+   if (prog->LinkStatus) {
+      link_shaders(ctx, prog);
+   }
+
+   if (prog->LinkStatus) {
+      if (!ctx->Driver.LinkShader(ctx, prog)) {
+         prog->LinkStatus = GL_FALSE;
+      }
+   }
+
+   set_uniform_initializers(ctx, prog);
+
+   if (ctx->Shader.Flags & GLSL_DUMP) {
+      if (!prog->LinkStatus) {
+         printf("GLSL shader program %d failed to link\n", prog->Name);
+      }
+
+      if (prog->InfoLog && prog->InfoLog[0] != 0) {
+         printf("GLSL shader program %d info log:\n", prog->Name);
+         printf("%s\n", prog->InfoLog);
+      }
+   }
+}
+
+} /* extern "C" */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
new file mode 100644
index 00000000000..e21c0d1e0af
--- /dev/null
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2011 Bryan Cain
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "main/glheader.h"
+#include "tgsi/tgsi_ureg.h"
+
+struct gl_context;
+struct gl_shader;
+struct gl_shader_program;
+struct glsl_to_tgsi_visitor;
+
+enum pipe_error st_translate_program(
+   struct gl_context *ctx,
+   uint procType,
+   struct ureg_program *ureg,
+   struct glsl_to_tgsi_visitor *program,
+   const struct gl_program *proginfo,
+   GLuint numInputs,
+   const GLuint inputMapping[],
+   const ubyte inputSemanticName[],
+   const ubyte inputSemanticIndex[],
+   const GLuint interpMode[],
+   GLuint numOutputs,
+   const GLuint outputMapping[],
+   const ubyte outputSemanticName[],
+   const ubyte outputSemanticIndex[],
+   boolean passthrough_edgeflags);
+
+void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v);
+
+struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
+
+struct gl_shader_program *
+st_new_shader_program(struct gl_context *ctx, GLuint name);
+
+void st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
+GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index a41e5b16a85..75842286ba8 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -267,7 +267,7 @@ src_register( struct st_translate *t,
 /**
  * Map mesa texture target to TGSI texture target.
  */
-static unsigned
+unsigned
 translate_texture_target( GLuint textarget,
                           GLboolean shadow )
 {
@@ -511,7 +511,7 @@ static void emit_ddy( struct st_translate *t,
 
 
 
-static unsigned
+unsigned
 translate_opcode( unsigned op )
 {
    switch( op ) {
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h
index 0615e52ef62..0dbdf5f6159 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.h
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h
@@ -64,6 +64,12 @@ st_translate_mesa_program(
 void
 st_free_tokens(const struct tgsi_token *tokens);
 
+unsigned
+translate_opcode(unsigned op);
+
+unsigned
+translate_texture_target(GLuint textarget, GLboolean shadow);
+
 
 #if defined __cplusplus
 } /* extern "C" */
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 7a6d33d3fea..dd618424d66 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -174,8 +174,8 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp)
  * \param tokensOut  destination for TGSI tokens
  * \return  pointer to cached pipe_shader object.
  */
-static void
-st_prepare_vertex_program(struct st_context *st,
+void
+st_prepare_vertex_program(struct gl_context *ctx,
                             struct st_vertex_program *stvp)
 {
    GLuint attr;
@@ -184,7 +184,7 @@ st_prepare_vertex_program(struct st_context *st,
    stvp->num_outputs = 0;
 
    if (stvp->Base.IsPositionInvariant)
-      _mesa_insert_mvp_code(st->ctx, &stvp->Base);
+      _mesa_insert_mvp_code(ctx, &stvp->Base);
 
    assert(stvp->Base.Base.NumInstructions > 1);
 
@@ -292,7 +292,7 @@ st_translate_vertex_program(struct st_context *st,
    enum pipe_error error;
    unsigned num_outputs;
 
-   st_prepare_vertex_program( st, stvp );
+   st_prepare_vertex_program(st->ctx, stvp);
 
    _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
    _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING);
@@ -318,22 +318,41 @@ st_translate_vertex_program(struct st_context *st,
       debug_printf("\n");
    }
 
-   error = st_translate_mesa_program(st->ctx,
-                                     TGSI_PROCESSOR_VERTEX,
-                                     ureg,
-                                     &stvp->Base.Base,
-                                     /* inputs */
-                                     vpv->num_inputs,
-                                     stvp->input_to_index,
-                                     NULL, /* input semantic name */
-                                     NULL, /* input semantic index */
-                                     NULL,
-                                     /* outputs */
-                                     num_outputs,
-                                     stvp->result_to_output,
-                                     stvp->output_semantic_name,
-                                     stvp->output_semantic_index,
-                                     key->passthrough_edgeflags );
+   if (stvp->glsl_to_tgsi)
+      error = st_translate_program(st->ctx,
+                                   TGSI_PROCESSOR_VERTEX,
+                                   ureg,
+                                   stvp->glsl_to_tgsi,
+                                   &stvp->Base.Base,
+                                   /* inputs */
+                                   stvp->num_inputs,
+                                   stvp->input_to_index,
+                                   NULL, /* input semantic name */
+                                   NULL, /* input semantic index */
+                                   NULL, /* interp mode */
+                                   /* outputs */
+                                   stvp->num_outputs,
+                                   stvp->result_to_output,
+                                   stvp->output_semantic_name,
+                                   stvp->output_semantic_index,
+                                   key->passthrough_edgeflags );
+   else
+      error = st_translate_mesa_program(st->ctx,
+                                        TGSI_PROCESSOR_VERTEX,
+                                        ureg,
+                                        &stvp->Base.Base,
+                                        /* inputs */
+                                        vpv->num_inputs,
+                                        stvp->input_to_index,
+                                        NULL, /* input semantic name */
+                                        NULL, /* input semantic index */
+                                        NULL,
+                                        /* outputs */
+                                        num_outputs,
+                                        stvp->result_to_output,
+                                        stvp->output_semantic_name,
+                                        stvp->output_semantic_index,
+                                        key->passthrough_edgeflags );
 
    if (error)
       goto fail;
@@ -393,6 +412,151 @@ st_get_vp_variant(struct st_context *st,
    return vpv;
 }
 
+/**
+ * Translate Mesa fragment shader attributes to TGSI attributes.
+ * \return GL_TRUE if color output should be written to all render targets, 
+ *         GL_FALSE if not
+ */
+GLboolean
+st_prepare_fragment_program(struct gl_context *ctx,
+                            struct st_fragment_program *stfp)
+{
+   GLuint attr;
+   const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
+   GLboolean write_all = GL_FALSE;
+
+   /*
+    * Convert Mesa program inputs to TGSI input register semantics.
+    */
+   for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
+      if (inputsRead & (1 << attr)) {
+         const GLuint slot = stfp->num_inputs++;
+
+         stfp->input_to_index[attr] = slot;
+
+         switch (attr) {
+         case FRAG_ATTRIB_WPOS:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
+            stfp->input_semantic_index[slot] = 0;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
+            break;
+         case FRAG_ATTRIB_COL0:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            stfp->input_semantic_index[slot] = 0;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
+            break;
+         case FRAG_ATTRIB_COL1:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            stfp->input_semantic_index[slot] = 1;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
+            break;
+         case FRAG_ATTRIB_FOGC:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+            stfp->input_semantic_index[slot] = 0;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
+            break;
+         case FRAG_ATTRIB_FACE:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
+            stfp->input_semantic_index[slot] = 0;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_CONSTANT;
+            break;
+            /* In most cases, there is nothing special about these
+             * inputs, so adopt a convention to use the generic
+             * semantic name and the mesa FRAG_ATTRIB_ number as the
+             * index. 
+             * 
+             * All that is required is that the vertex shader labels
+             * its own outputs similarly, and that the vertex shader
+             * generates at least every output required by the
+             * fragment shader plus fixed-function hardware (such as
+             * BFC).
+             * 
+             * There is no requirement that semantic indexes start at
+             * zero or be restricted to a particular range -- nobody
+             * should be building tables based on semantic index.
+             */
+         case FRAG_ATTRIB_PNTC:
+         case FRAG_ATTRIB_TEX0:
+         case FRAG_ATTRIB_TEX1:
+         case FRAG_ATTRIB_TEX2:
+         case FRAG_ATTRIB_TEX3:
+         case FRAG_ATTRIB_TEX4:
+         case FRAG_ATTRIB_TEX5:
+         case FRAG_ATTRIB_TEX6:
+         case FRAG_ATTRIB_TEX7:
+         case FRAG_ATTRIB_VAR0:
+         default:
+            /* Actually, let's try and zero-base this just for
+             * readability of the generated TGSI.
+             */
+            assert(attr >= FRAG_ATTRIB_TEX0);
+            stfp->input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0);
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+            if (attr == FRAG_ATTRIB_PNTC)
+               stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
+            else
+               stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
+            break;
+         }
+      }
+      else {
+         stfp->input_to_index[attr] = -1;
+      }
+   }
+
+   /*
+    * Semantics and mapping for outputs
+    */
+   {
+      uint numColors = 0;
+      GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten;
+
+      /* if z is written, emit that first */
+      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+         stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_POSITION;
+         stfp->output_semantic_index[stfp->num_outputs] = 0;
+         stfp->result_to_output[FRAG_RESULT_DEPTH] = stfp->num_outputs;
+         stfp->num_outputs++;
+         outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
+      }
+
+      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
+         stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_STENCIL;
+         stfp->output_semantic_index[stfp->num_outputs] = 0;
+         stfp->result_to_output[FRAG_RESULT_STENCIL] = stfp->num_outputs;
+         stfp->num_outputs++;
+         outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
+      }
+
+      /* handle remaning outputs (color) */
+      for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
+         if (outputsWritten & BITFIELD64_BIT(attr)) {
+            switch (attr) {
+            case FRAG_RESULT_DEPTH:
+            case FRAG_RESULT_STENCIL:
+               /* handled above */
+               assert(0);
+               break;
+            case FRAG_RESULT_COLOR:
+               write_all = GL_TRUE; /* fallthrough */
+            default:
+               assert(attr == FRAG_RESULT_COLOR ||
+                      (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
+               stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_COLOR;
+               stfp->output_semantic_index[stfp->num_outputs] = numColors;
+               stfp->result_to_output[attr] = stfp->num_outputs;
+               numColors++;
+               break;
+            }
+
+            stfp->num_outputs++;
+         }
+      }
+   }
+   
+   return write_all;
+}
+
 
 /**
  * Translate a Mesa fragment shader into a TGSI shader using extra info in
@@ -445,155 +609,12 @@ st_translate_fragment_program(struct st_context *st,
 
    if (!stfp->tgsi.tokens) {
       /* need to translate Mesa instructions to TGSI now */
-      GLuint outputMapping[FRAG_RESULT_MAX];
-      GLuint inputMapping[FRAG_ATTRIB_MAX];
-      GLuint interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
-      GLuint attr;
       enum pipe_error error;
-      const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
       struct ureg_program *ureg;
-      GLboolean write_all = GL_FALSE;
-
-      ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
-      ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
-      uint fs_num_inputs = 0;
-
-      ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
-      ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
-      uint fs_num_outputs = 0;
-
-
+      GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp);
+      
       _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
 
-      /*
-       * Convert Mesa program inputs to TGSI input register semantics.
-       */
-      for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
-         if (inputsRead & (1 << attr)) {
-            const GLuint slot = fs_num_inputs++;
-
-            inputMapping[attr] = slot;
-
-            switch (attr) {
-            case FRAG_ATTRIB_WPOS:
-               input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
-               input_semantic_index[slot] = 0;
-               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
-               break;
-            case FRAG_ATTRIB_COL0:
-               input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-               input_semantic_index[slot] = 0;
-               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
-               break;
-            case FRAG_ATTRIB_COL1:
-               input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-               input_semantic_index[slot] = 1;
-               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
-               break;
-            case FRAG_ATTRIB_FOGC:
-               input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
-               input_semantic_index[slot] = 0;
-               interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
-               break;
-            case FRAG_ATTRIB_FACE:
-               input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
-               input_semantic_index[slot] = 0;
-               interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
-               break;
-               /* In most cases, there is nothing special about these
-                * inputs, so adopt a convention to use the generic
-                * semantic name and the mesa FRAG_ATTRIB_ number as the
-                * index. 
-                * 
-                * All that is required is that the vertex shader labels
-                * its own outputs similarly, and that the vertex shader
-                * generates at least every output required by the
-                * fragment shader plus fixed-function hardware (such as
-                * BFC).
-                * 
-                * There is no requirement that semantic indexes start at
-                * zero or be restricted to a particular range -- nobody
-                * should be building tables based on semantic index.
-                */
-            case FRAG_ATTRIB_PNTC:
-            case FRAG_ATTRIB_TEX0:
-            case FRAG_ATTRIB_TEX1:
-            case FRAG_ATTRIB_TEX2:
-            case FRAG_ATTRIB_TEX3:
-            case FRAG_ATTRIB_TEX4:
-            case FRAG_ATTRIB_TEX5:
-            case FRAG_ATTRIB_TEX6:
-            case FRAG_ATTRIB_TEX7:
-            case FRAG_ATTRIB_VAR0:
-            default:
-               /* Actually, let's try and zero-base this just for
-                * readability of the generated TGSI.
-                */
-               assert(attr >= FRAG_ATTRIB_TEX0);
-               input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0);
-               input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-               if (attr == FRAG_ATTRIB_PNTC)
-                  interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
-               else
-                  interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
-               break;
-            }
-         }
-         else {
-            inputMapping[attr] = -1;
-         }
-      }
-
-      /*
-       * Semantics and mapping for outputs
-       */
-      {
-         uint numColors = 0;
-         GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten;
-
-         /* if z is written, emit that first */
-         if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
-            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
-            fs_output_semantic_index[fs_num_outputs] = 0;
-            outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
-            fs_num_outputs++;
-            outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
-         }
-
-         if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
-            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
-            fs_output_semantic_index[fs_num_outputs] = 0;
-            outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
-            fs_num_outputs++;
-            outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
-         }
-
-         /* handle remaning outputs (color) */
-         for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
-            if (outputsWritten & BITFIELD64_BIT(attr)) {
-               switch (attr) {
-               case FRAG_RESULT_DEPTH:
-               case FRAG_RESULT_STENCIL:
-                  /* handled above */
-                  assert(0);
-                  break;
-               case FRAG_RESULT_COLOR:
-                  write_all = GL_TRUE; /* fallthrough */
-               default:
-                  assert(attr == FRAG_RESULT_COLOR ||
-                         (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
-                  fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
-                  fs_output_semantic_index[fs_num_outputs] = numColors;
-                  outputMapping[attr] = fs_num_outputs;
-                  numColors++;
-                  break;
-               }
-
-               fs_num_outputs++;
-            }
-         }
-      }
-
       ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
       if (ureg == NULL)
          return NULL;
@@ -606,21 +627,39 @@ st_translate_fragment_program(struct st_context *st,
       if (write_all == GL_TRUE)
          ureg_property_fs_color0_writes_all_cbufs(ureg, 1);
 
-      error = st_translate_mesa_program(st->ctx,
-                                        TGSI_PROCESSOR_FRAGMENT,
-                                        ureg,
-                                        &stfp->Base.Base,
-                                        /* inputs */
-                                        fs_num_inputs,
-                                        inputMapping,
-                                        input_semantic_name,
-                                        input_semantic_index,
-                                        interpMode,
-                                        /* outputs */
-                                        fs_num_outputs,
-                                        outputMapping,
-                                        fs_output_semantic_name,
-                                        fs_output_semantic_index, FALSE );
+      if (stfp->glsl_to_tgsi)
+         error = st_translate_program(st->ctx,
+                                      TGSI_PROCESSOR_FRAGMENT,
+                                      ureg,
+                                      stfp->glsl_to_tgsi,
+                                      &stfp->Base.Base,
+                                      /* inputs */
+                                      stfp->num_inputs,
+                                      stfp->input_to_index,
+                                      stfp->input_semantic_name,
+                                      stfp->input_semantic_index,
+                                      stfp->interp_mode,
+                                      /* outputs */
+                                      stfp->num_outputs,
+                                      stfp->result_to_output,
+                                      stfp->output_semantic_name,
+                                      stfp->output_semantic_index, FALSE );
+      else
+         error = st_translate_mesa_program(st->ctx,
+                                           TGSI_PROCESSOR_FRAGMENT,
+                                           ureg,
+                                           &stfp->Base.Base,
+                                           /* inputs */
+                                           stfp->num_inputs,
+                                           stfp->input_to_index,
+                                           stfp->input_semantic_name,
+                                           stfp->input_semantic_index,
+                                           stfp->interp_mode,
+                                           /* outputs */
+                                           stfp->num_outputs,
+                                           stfp->result_to_output,
+                                           stfp->output_semantic_name,
+                                           stfp->output_semantic_index, FALSE );
 
       stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL );
       ureg_destroy( ureg );
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index c4244df939e..67723de6d53 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -38,6 +38,7 @@
 #include "program/program.h"
 #include "pipe/p_state.h"
 #include "st_context.h"
+#include "st_glsl_to_tgsi.h"
 
 
 /** Fragment program variant key */
@@ -83,6 +84,22 @@ struct st_fp_variant
 struct st_fragment_program
 {
    struct gl_fragment_program Base;
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
+   
+   /** maps a Mesa FRAG_ATTRIB_x to a packed TGSI input index */
+   GLuint input_to_index[FRAG_ATTRIB_MAX];
+   /** maps a TGSI input index back to a Mesa FRAG_ATTRIB_x */
+   GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
+   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
+   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+   GLuint num_inputs;
+   GLuint interp_mode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
+
+   /** Maps FRAG_RESULT_x to slot */
+   GLuint result_to_output[FRAG_RESULT_MAX];
+   ubyte output_semantic_name[FRAG_RESULT_MAX];
+   ubyte output_semantic_index[FRAG_RESULT_MAX];
+   GLuint num_outputs;
 
    struct pipe_shader_state tgsi;
 
@@ -136,6 +153,7 @@ struct st_vp_variant
 struct st_vertex_program
 {
    struct gl_vertex_program Base;  /**< The Mesa vertex program */
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
 
    /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */
    GLuint input_to_index[VERT_ATTRIB_MAX];
@@ -184,6 +202,7 @@ struct st_gp_variant
 struct st_geometry_program
 {
    struct gl_geometry_program Base;  /**< The Mesa geometry program */
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
 
    /** map GP input back to VP output */
    GLuint input_map[PIPE_MAX_SHADER_INPUTS];
@@ -276,6 +295,14 @@ st_get_gp_variant(struct st_context *st,
                   const struct st_gp_variant_key *key);
 
 
+extern void
+st_prepare_vertex_program(struct gl_context *ctx,
+                          struct st_vertex_program *stvp);
+
+extern GLboolean
+st_prepare_fragment_program(struct gl_context *ctx,
+                            struct st_fragment_program *stfp);
+
 
 extern void
 st_release_vp_variants( struct st_context *st,

From 1e5fd8e480b661c1ab748c2ded587650ea7f3d20 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 29 Apr 2011 19:00:24 -0500
Subject: [PATCH 141/600] mesa: fix segfault when no Mesa IR is generated

---
 src/mesa/program/program.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 78efca9f122..224446a2683 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -388,8 +388,9 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog)
    if (prog->String)
       free(prog->String);
 
-   _mesa_free_instructions(prog->Instructions, prog->NumInstructions);
-
+   if (prog->Instructions) {
+      _mesa_free_instructions(prog->Instructions, prog->NumInstructions);
+   }
    if (prog->Parameters) {
       _mesa_free_parameter_list(prog->Parameters);
    }

From 44867da3543ca54ef245695cef72a6e305451d93 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 29 Apr 2011 19:24:57 -0500
Subject: [PATCH 142/600] glsl_to_tgsi: stop generating Mesa IR

Before, it was still generating unused Mesa IR as a remnant of ir_to_mesa, and
depended on some of the information from it.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 290 ++-------------------
 src/mesa/state_tracker/st_program.c        |  13 +-
 2 files changed, 33 insertions(+), 270 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e1102503ee0..c562abc96c9 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -254,8 +254,9 @@ public:
    struct gl_shader_compiler_options *options;
 
    int next_temp;
-   
+
    int num_address_regs;
+   int samplers_used;
    bool indirect_addr_temps;
    bool indirect_addr_consts;
 
@@ -2310,170 +2311,23 @@ extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
    delete v;
 }
 
-static struct prog_src_register
-mesa_st_src_reg_from_ir_st_src_reg(st_src_reg reg)
-{
-   struct prog_src_register mesa_reg;
-
-   mesa_reg.File = reg.file;
-   assert(reg.index < (1 << INST_INDEX_BITS));
-   mesa_reg.Index = reg.index;
-   mesa_reg.Swizzle = reg.swizzle;
-   mesa_reg.RelAddr = reg.reladdr != NULL;
-   mesa_reg.Negate = reg.negate;
-   mesa_reg.Abs = 0;
-   mesa_reg.HasIndex2 = GL_FALSE;
-   mesa_reg.RelAddr2 = 0;
-   mesa_reg.Index2 = 0;
-
-   return mesa_reg;
-}
-
-static void
-set_branchtargets(glsl_to_tgsi_visitor *v,
-        	  struct prog_instruction *mesa_instructions,
-        	  int num_instructions)
-{
-   int if_count = 0, loop_count = 0;
-   int *if_stack, *loop_stack;
-   int if_stack_pos = 0, loop_stack_pos = 0;
-   int i, j;
-
-   for (i = 0; i < num_instructions; i++) {
-      switch (mesa_instructions[i].Opcode) {
-      case OPCODE_IF:
-         if_count++;
-         break;
-      case OPCODE_BGNLOOP:
-         loop_count++;
-         break;
-      case OPCODE_BRK:
-      case OPCODE_CONT:
-         mesa_instructions[i].BranchTarget = -1;
-         break;
-      default:
-         break;
-      }
-   }
-
-   if_stack = rzalloc_array(v->mem_ctx, int, if_count);
-   loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
-
-   for (i = 0; i < num_instructions; i++) {
-      switch (mesa_instructions[i].Opcode) {
-      case OPCODE_IF:
-         if_stack[if_stack_pos] = i;
-         if_stack_pos++;
-         break;
-      case OPCODE_ELSE:
-         mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
-         if_stack[if_stack_pos - 1] = i;
-         break;
-      case OPCODE_ENDIF:
-         mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
-         if_stack_pos--;
-         break;
-      case OPCODE_BGNLOOP:
-         loop_stack[loop_stack_pos] = i;
-         loop_stack_pos++;
-         break;
-      case OPCODE_ENDLOOP:
-         loop_stack_pos--;
-         /* Rewrite any breaks/conts at this nesting level (haven't
-          * already had a BranchTarget assigned) to point to the end
-          * of the loop.
-          */
-         for (j = loop_stack[loop_stack_pos]; j < i; j++) {
-            if (mesa_instructions[j].Opcode == OPCODE_BRK ||
-        	mesa_instructions[j].Opcode == OPCODE_CONT) {
-               if (mesa_instructions[j].BranchTarget == -1) {
-        	  mesa_instructions[j].BranchTarget = i;
-               }
-            }
-         }
-         /* The loop ends point at each other. */
-         mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
-         mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
-         break;
-      case OPCODE_CAL:
-         foreach_iter(exec_list_iterator, iter, v->function_signatures) {
-            function_entry *entry = (function_entry *)iter.get();
-
-            if (entry->sig_id == mesa_instructions[i].BranchTarget) {
-               mesa_instructions[i].BranchTarget = entry->inst;
-               break;
-            }
-         }
-         break;
-      default:
-         break;
-      }
-   }
-}
-
-static void
-print_program(struct prog_instruction *mesa_instructions,
-              ir_instruction **mesa_instruction_annotation,
-              int num_instructions)
-{
-   /*ir_instruction *last_ir = NULL;*/
-   int i;
-   int indent = 0;
-
-   for (i = 0; i < num_instructions; i++) {
-      struct prog_instruction *mesa_inst = mesa_instructions + i;
-
-      fprintf(stdout, "%3d: ", i);
-
-#if 0
-/* Disable this for now, since printing GLSL IR along with its corresponding 
- * Mesa IR makes the Mesa IR unreadable. */
-      ir_instruction *ir = mesa_instruction_annotation[i];
-      if (last_ir != ir && ir) {
-         int j;
-
-         for (j = 0; j < indent; j++) {
-            fprintf(stdout, " ");
-         }
-         ir->print();
-         printf("\n");
-         last_ir = ir;
-
-         fprintf(stdout, "     "); /* line number spacing. */
-      }
-#endif
-
-      indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
-        				    PROG_PRINT_DEBUG, NULL);
-   }
-}
-
 
 /**
  * Count resources used by the given gpu program (number of texture
  * samplers, etc).
  */
 static void
-count_resources(struct gl_program *prog)
+count_resources(glsl_to_tgsi_visitor *v)
 {
-   unsigned int i;
+   v->samplers_used = 0;
 
-   prog->SamplersUsed = 0;
+   foreach_iter(exec_list_iterator, iter, v->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
 
-   for (i = 0; i < prog->NumInstructions; i++) {
-      struct prog_instruction *inst = &prog->Instructions[i];
-
-      if (_mesa_is_tex_instruction(inst->Opcode)) {
-         prog->SamplerTargets[inst->TexSrcUnit] =
-            (gl_texture_index)inst->TexSrcTarget;
-         prog->SamplersUsed |= 1 << inst->TexSrcUnit;
-         if (inst->TexShadow) {
-            prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
-         }
+      if (_mesa_is_tex_instruction(inst->op)) {
+         v->samplers_used |= 1 << inst->sampler;
       }
    }
-
-   _mesa_update_shader_textures_used(prog);
 }
 
 
@@ -2487,34 +2341,35 @@ count_resources(struct gl_program *prog)
 static void
 check_resources(const struct gl_context *ctx,
                 struct gl_shader_program *shader_program,
-                struct gl_program *prog)
+                glsl_to_tgsi_visitor *prog,
+                struct gl_program *proginfo)
 {
-   switch (prog->Target) {
+   switch (proginfo->Target) {
    case GL_VERTEX_PROGRAM_ARB:
-      if (_mesa_bitcount(prog->SamplersUsed) >
+      if (_mesa_bitcount(prog->samplers_used) >
           ctx->Const.MaxVertexTextureImageUnits) {
          fail_link(shader_program, "Too many vertex shader texture samplers");
       }
-      if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
+      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
          fail_link(shader_program, "Too many vertex shader constants");
       }
       break;
    case MESA_GEOMETRY_PROGRAM:
-      if (_mesa_bitcount(prog->SamplersUsed) >
+      if (_mesa_bitcount(prog->samplers_used) >
           ctx->Const.MaxGeometryTextureImageUnits) {
          fail_link(shader_program, "Too many geometry shader texture samplers");
       }
-      if (prog->Parameters->NumParameters >
+      if (proginfo->Parameters->NumParameters >
           MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
          fail_link(shader_program, "Too many geometry shader constants");
       }
       break;
    case GL_FRAGMENT_PROGRAM_ARB:
-      if (_mesa_bitcount(prog->SamplersUsed) >
+      if (_mesa_bitcount(prog->samplers_used) >
           ctx->Const.MaxTextureImageUnits) {
          fail_link(shader_program, "Too many fragment shader texture samplers");
       }
-      if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
+      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
          fail_link(shader_program, "Too many fragment shader constants");
       }
       break;
@@ -3767,8 +3622,6 @@ st_translate_program(
    t->pointSizeOutIndex = -1;
    t->prevInstWrotePointSize = GL_FALSE;
 
-   /*_mesa_print_program(program);*/
-
    /*
     * Declare input attributes.
     */
@@ -3952,8 +3805,7 @@ st_translate_program(
 
    /* texture samplers */
    for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
-      // XXX: depends on SamplersUsed property generated by conversion to Mesa IR
-      if (proginfo->SamplersUsed & (1 << i)) {
+      if (program->samplers_used & (1 << i)) {
          t->samplers[i] = ureg_DECL_sampler( ureg, i );
       }
    }
@@ -4006,7 +3858,8 @@ out:
 /* ----------------------------- End TGSI code ------------------------------ */
 
 /**
- * Convert a shader's GLSL IR into both a Mesa gl_program and a TGSI shader.
+ * Convert a shader's GLSL IR into a Mesa gl_program, although without 
+ * generating Mesa IR.
  */
 static struct gl_program *
 get_mesa_program(struct gl_context *ctx,
@@ -4014,9 +3867,6 @@ get_mesa_program(struct gl_context *ctx,
         	 struct gl_shader *shader)
 {
    glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
-   struct prog_instruction *mesa_instructions, *mesa_inst;
-   ir_instruction **mesa_instruction_annotation;
-   int i;
    struct gl_program *prog;
    GLenum target;
    const char *target_string;
@@ -4110,90 +3960,6 @@ get_mesa_program(struct gl_context *ctx,
    v->merge_registers();
    v->renumber_registers();
 
-   prog->NumTemporaries = v->next_temp;
-
-   int num_instructions = 0;
-   foreach_iter(exec_list_iterator, iter, v->instructions) {
-      num_instructions++;
-   }
-
-   mesa_instructions =
-      (struct prog_instruction *)calloc(num_instructions,
-        				sizeof(*mesa_instructions));
-   mesa_instruction_annotation = ralloc_array(v->mem_ctx, ir_instruction *,
-        				      num_instructions);
-
-   /* Convert glsl_to_tgsi_instructions into Mesa IR prog_instructions.
-    * TODO: remove
-    */
-   mesa_inst = mesa_instructions;
-   i = 0;
-   foreach_iter(exec_list_iterator, iter, v->instructions) {
-      const glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
-
-      mesa_inst->Opcode = inst->op;
-      mesa_inst->CondUpdate = inst->cond_update;
-      if (inst->saturate)
-         mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
-      mesa_inst->DstReg.File = inst->dst.file;
-      mesa_inst->DstReg.Index = inst->dst.index;
-      mesa_inst->DstReg.CondMask = inst->dst.cond_mask;
-      mesa_inst->DstReg.WriteMask = inst->dst.writemask;
-      mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
-      mesa_inst->SrcReg[0] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[0]);
-      mesa_inst->SrcReg[1] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[1]);
-      mesa_inst->SrcReg[2] = mesa_st_src_reg_from_ir_st_src_reg(inst->src[2]);
-      mesa_inst->TexSrcUnit = inst->sampler;
-      mesa_inst->TexSrcTarget = inst->tex_target;
-      mesa_inst->TexShadow = inst->tex_shadow;
-      mesa_instruction_annotation[i] = inst->ir;
-
-      /* Set IndirectRegisterFiles. */
-      if (mesa_inst->DstReg.RelAddr)
-         prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
-
-      /* Update program's bitmask of indirectly accessed register files */
-      for (unsigned src = 0; src < 3; src++)
-         if (mesa_inst->SrcReg[src].RelAddr)
-            prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
-
-      if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
-         fail_link(shader_program, "Couldn't flatten if statement\n");
-      }
-
-      switch (mesa_inst->Opcode) {
-      case OPCODE_BGNSUB:
-         inst->function->inst = i;
-         mesa_inst->Comment = strdup(inst->function->sig->function_name());
-         break;
-      case OPCODE_ENDSUB:
-         mesa_inst->Comment = strdup(inst->function->sig->function_name());
-         break;
-      case OPCODE_CAL:
-         mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
-         break;
-      case OPCODE_ARL:
-         prog->NumAddressRegs = 1;
-         break;
-      default:
-         break;
-      }
-
-      mesa_inst++;
-      i++;
-
-      if (!shader_program->LinkStatus)
-         break;
-   }
-
-   if (!shader_program->LinkStatus) {
-      free(mesa_instructions);
-      _mesa_reference_program(ctx, &shader->Program, NULL);
-      return NULL;
-   }
-
-   set_branchtargets(v, mesa_instructions, num_instructions);
-
    if (ctx->Shader.Flags & GLSL_DUMP) {
       printf("\n");
       printf("GLSL IR for linked %s program %d:\n", target_string,
@@ -4201,25 +3967,17 @@ get_mesa_program(struct gl_context *ctx,
       _mesa_print_ir(shader->ir, NULL);
       printf("\n");
       printf("\n");
-      printf("Mesa IR for linked %s program %d:\n", target_string,
-             shader_program->Name);
-      print_program(mesa_instructions, mesa_instruction_annotation,
-        	    num_instructions);
    }
 
-   prog->Instructions = mesa_instructions;
-   prog->NumInstructions = num_instructions;
+   prog->Instructions = NULL;
+   prog->NumInstructions = 0;
 
    do_set_program_inouts(shader->ir, prog);
-   count_resources(prog);
+   count_resources(v);
 
-   check_resources(ctx, shader_program, prog);
+   check_resources(ctx, shader_program, v, prog);
 
    _mesa_reference_program(ctx, &shader->Program, prog);
-
-   if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
-      _mesa_optimize_program(ctx, prog);
-   }
    
    struct st_vertex_program *stvp;
    struct st_fragment_program *stfp;
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index dd618424d66..6d395128295 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -186,7 +186,8 @@ st_prepare_vertex_program(struct gl_context *ctx,
    if (stvp->Base.IsPositionInvariant)
       _mesa_insert_mvp_code(ctx, &stvp->Base);
 
-   assert(stvp->Base.Base.NumInstructions > 1);
+   if (!stvp->glsl_to_tgsi)
+      assert(stvp->Base.Base.NumInstructions > 1);
 
    /*
     * Determine number of inputs, the mappings between VERT_ATTRIB_x
@@ -294,8 +295,11 @@ st_translate_vertex_program(struct st_context *st,
 
    st_prepare_vertex_program(st->ctx, stvp);
 
-   _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
-   _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING);
+   if (!stvp->glsl_to_tgsi)
+   {
+      _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
+      _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING);
+   }
 
    ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
    if (ureg == NULL) {
@@ -613,7 +617,8 @@ st_translate_fragment_program(struct st_context *st,
       struct ureg_program *ureg;
       GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp);
       
-      _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
+      if (!stfp->glsl_to_tgsi)
+         _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
 
       ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
       if (ureg == NULL)

From c341d3cfd0ddbabf6274212b7f0da1a25854a673 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 30 Apr 2011 13:03:33 -0500
Subject: [PATCH 143/600] glsl_to_tgsi: remove reads to output registers

Fixes a regression in 0 A.D. introduced by 809a11c77073e999fd47.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 91 ++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index c562abc96c9..5ea03b4424e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -100,6 +100,15 @@ public:
       this->reladdr = NULL;
    }
 
+   st_src_reg(gl_register_file file, int index)
+   {
+      this->file = file;
+      this->index = index;
+      this->swizzle = SWIZZLE_XYZW;
+      this->negate = 0;
+      this->reladdr = NULL;
+   }
+
    st_src_reg()
    {
       this->file = PROGRAM_UNDEFINED;
@@ -346,6 +355,8 @@ public:
 
    bool process_move_condition(ir_rvalue *ir);
 
+   void remove_output_reads(gl_register_file type);
+
    void rename_temp_register(int index, int new_index);
    int get_first_temp_read(int index);
    int get_first_temp_write(int index);
@@ -2595,6 +2606,81 @@ set_uniform_initializers(struct gl_context *ctx,
    ralloc_free(mem_ctx);
 }
 
+/*
+ * Scan/rewrite program to remove reads of custom (output) registers.
+ * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
+ * (for vertex shaders).
+ * In GLSL shaders, varying vars can be read and written.
+ * On some hardware, trying to read an output register causes trouble.
+ * So, rewrite the program to use a temporary register in this case.
+ * 
+ * Based on _mesa_remove_output_reads from programopt.c.
+ */
+void
+glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
+{
+   GLuint i;
+   GLint outputMap[VERT_RESULT_MAX];
+   GLuint numVaryingReads = 0;
+   GLboolean usedTemps[MAX_PROGRAM_TEMPS];
+   GLuint firstTemp = 0;
+
+   _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
+                             usedTemps, MAX_PROGRAM_TEMPS);
+
+   assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
+   assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
+
+   for (i = 0; i < VERT_RESULT_MAX; i++)
+      outputMap[i] = -1;
+
+   /* look for instructions which read from varying vars */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      const GLuint numSrc = _mesa_num_inst_src_regs(inst->op);
+      GLuint j;
+      for (j = 0; j < numSrc; j++) {
+         if (inst->src[j].file == type) {
+            /* replace the read with a temp reg */
+            const GLuint var = inst->src[j].index;
+            if (outputMap[var] == -1) {
+               numVaryingReads++;
+               outputMap[var] = _mesa_find_free_register(usedTemps,
+                                                         MAX_PROGRAM_TEMPS,
+                                                         firstTemp);
+               firstTemp = outputMap[var] + 1;
+            }
+            inst->src[j].file = PROGRAM_TEMPORARY;
+            inst->src[j].index = outputMap[var];
+         }
+      }
+   }
+
+   if (numVaryingReads == 0)
+      return; /* nothing to be done */
+
+   /* look for instructions which write to the varying vars identified above */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) {
+         /* change inst to write to the temp reg, instead of the varying */
+         inst->dst.file = PROGRAM_TEMPORARY;
+         inst->dst.index = outputMap[inst->dst.index];
+      }
+   }
+   
+   /* insert new MOV instructions at the end */
+   for (i = 0; i < VERT_RESULT_MAX; i++) {
+      if (outputMap[i] >= 0) {
+         /* MOV VAR[i], TEMP[tmp]; */
+         st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]);
+         st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW);
+         dst.index = i;
+         this->emit(NULL, OPCODE_MOV, dst, src);
+      }
+   }
+}
+
 /* Replaces all references to a temporary register index with another index. */
 void
 glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
@@ -3954,6 +4040,11 @@ get_mesa_program(struct gl_context *ctx,
    }
 #endif
 
+   /* Remove reads to output registers, and to varyings in vertex shaders. */
+   v->remove_output_reads(PROGRAM_OUTPUT);
+   if (target == GL_VERTEX_PROGRAM_ARB)
+      v->remove_output_reads(PROGRAM_VARYING);
+
    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
    v->copy_propagate();
    v->eliminate_dead_code();

From 556bd82ce1227a568d69dfa0c22841986267d39f Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 30 Apr 2011 13:44:32 -0500
Subject: [PATCH 144/600] glsl_to_tgsi: remove a bad assertion

It was triggered by Alien Arena.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5ea03b4424e..aa63539e5e8 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3077,11 +3077,11 @@ glsl_to_tgsi_visitor::merge_registers(void)
    
    /* Start looking for registers with non-overlapping usages that can be 
     * merged together. */
-   for (i=0; i < this->next_temp - 1; i++) {
+   for (i=0; i < this->next_temp; i++) {
       /* Don't touch unused registers. */
       if (last_reads[i] < 0 || first_writes[i] < 0) continue;
       
-      for (j=i+1; j < this->next_temp; j++) {
+      for (j=0; j < this->next_temp; j++) {
          /* Don't touch unused registers. */
          if (last_reads[j] < 0 || first_writes[j] < 0) continue;
          
@@ -3089,8 +3089,9 @@ glsl_to_tgsi_visitor::merge_registers(void)
           * in the same instruction as the last read from i.  Note that the 
           * register at index i will always be used earlier or at the same time 
           * as the register at index j. */
-         assert(first_writes[i] <= first_writes[j]);
-         if (last_reads[i] <= first_writes[j]) {
+         if (first_writes[i] <= first_writes[j] && 
+             last_reads[i] <= first_writes[j])
+         {
             rename_temp_register(j, i); /* Replace all references to j with i.*/
             
             /* Update the first_writes and last_reads arrays with the new 

From 5768ed6429937940bd48f5de4f8383273952880a Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 30 Apr 2011 21:17:38 -0500
Subject: [PATCH 145/600] glsl_to_tgsi: define the sampler objects used

Fixes the Nexuiz title screen and the water in 0 A.D.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index aa63539e5e8..5f3f0ba295a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2328,7 +2328,7 @@ extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
  * samplers, etc).
  */
 static void
-count_resources(glsl_to_tgsi_visitor *v)
+count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
 {
    v->samplers_used = 0;
 
@@ -2337,8 +2337,17 @@ count_resources(glsl_to_tgsi_visitor *v)
 
       if (_mesa_is_tex_instruction(inst->op)) {
          v->samplers_used |= 1 << inst->sampler;
+
+         prog->SamplerTargets[inst->sampler] =
+            (gl_texture_index)inst->tex_target;
+         if (inst->tex_shadow) {
+            prog->ShadowSamplers |= 1 << inst->sampler;
+         }
       }
    }
+   
+   prog->SamplersUsed = v->samplers_used;
+   _mesa_update_shader_textures_used(prog);
 }
 
 
@@ -4065,7 +4074,7 @@ get_mesa_program(struct gl_context *ctx,
    prog->NumInstructions = 0;
 
    do_set_program_inouts(shader->ir, prog);
-   count_resources(v);
+   count_resources(v, prog);
 
    check_resources(ctx, shader_program, v, prog);
 

From a6705aa5ca151278ed1e596b68a327afd1405b9e Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 30 Apr 2011 23:17:11 -0500
Subject: [PATCH 146/600] glsl_to_tgsi: lower noise opcodes when converting
 from GLSL IR, not when generating TGSI

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5f3f0ba295a..08c6a7b2dd3 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1275,12 +1275,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       break;
 
    case ir_unop_noise: {
-      const enum prog_opcode opcode =
-         prog_opcode(OPCODE_NOISE1
-        	     + (ir->operands[0]->type->vector_elements) - 1);
-      assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
-
-      emit(ir, opcode, result_dst, op[0]);
+      /* At some point, a motivated person could add a better
+       * implementation of noise.  Currently not even the nvidia
+       * binary drivers do anything more than this.  In any case, the
+       * place to do this is in the GL state tracker, not the poor
+       * driver.
+       */
+      emit(ir, OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
       break;
    }
 
@@ -3484,13 +3485,7 @@ compile_tgsi_instruction(struct st_translate *t,
    case OPCODE_NOISE2:
    case OPCODE_NOISE3:
    case OPCODE_NOISE4:
-      /* At some point, a motivated person could add a better
-       * implementation of noise.  Currently not even the nvidia
-       * binary drivers do anything more than this.  In any case, the
-       * place to do this is in the GL state tracker, not the poor
-       * driver.
-       */
-      ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
+      assert(!"OPCODE_NOISE should have been lowered\n");
       break;
 		 
    case OPCODE_DDY:

From 3b0858f1aed83e2d90449f042d625c86ac7b93ed Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sun, 1 May 2011 11:55:03 -0500
Subject: [PATCH 147/600] glsl_to_tgsi: support DDY (ir_unop_dFdy)

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 08c6a7b2dd3..eed9bb0819e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1271,6 +1271,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       emit(ir, OPCODE_DDX, result_dst, op[0]);
       break;
    case ir_unop_dFdy:
+      op[0].negate = ~op[0].negate;
       emit(ir, OPCODE_DDY, result_dst, op[0]);
       break;
 
@@ -3487,12 +3488,6 @@ compile_tgsi_instruction(struct st_translate *t,
    case OPCODE_NOISE4:
       assert(!"OPCODE_NOISE should have been lowered\n");
       break;
-		 
-   case OPCODE_DDY:
-      // TODO: copy emit_ddy() function from st_mesa_to_tgsi.c
-      assert(!"OPCODE_DDY");
-      //emit_ddy( t, dst[0], &inst->src[0] );
-      break;
 
    default:
       ureg_insn( ureg, 

From 56dc2c176c3ef0d4d5abea54ff4035b062262286 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sun, 1 May 2011 21:49:21 -0500
Subject: [PATCH 148/600] glsl_to_tgsi: use TGSI opcodes when converting from
 GLSL IR

Before, the translator used Mesa IR opcodes (a holdover from ir_to_mesa) and
converted them to TGSI opcodes during TGSI emission.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 550 ++++++++-------------
 1 file changed, 217 insertions(+), 333 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index eed9bb0819e..4cb2f377e98 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -27,7 +27,7 @@
 /**
  * \file glsl_to_tgsi.cpp
  *
- * Translate GLSL IR to Mesa's gl_program representation and to TGSI.
+ * Translate GLSL IR to TGSI.
  */
 
 #include <stdio.h>
@@ -63,11 +63,12 @@ extern "C" {
 #include "pipe/p_state.h"
 #include "util/u_math.h"
 #include "tgsi/tgsi_ureg.h"
-#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
 #include "st_context.h"
 #include "st_program.h"
 #include "st_glsl_to_tgsi.h"
 #include "st_mesa_to_tgsi.h"
+}
 
 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
                            (1 << PROGRAM_ENV_PARAM) |    \
@@ -75,7 +76,6 @@ extern "C" {
                            (1 << PROGRAM_NAMED_PARAM) |  \
                            (1 << PROGRAM_CONSTANT) |     \
                            (1 << PROGRAM_UNIFORM))
-}
 
 class st_src_reg;
 class st_dst_reg;
@@ -83,8 +83,7 @@ class st_dst_reg;
 static int swizzle_for_size(int size);
 
 /**
- * This struct is a corresponding struct to Mesa prog_src_register, with
- * wider fields.
+ * This struct is a corresponding struct to TGSI ureg_src.
  */
 class st_src_reg {
 public:
@@ -190,7 +189,7 @@ public:
       return node;
    }
 
-   enum prog_opcode op;
+   unsigned op;
    st_dst_reg dst;
    st_src_reg src[3];
    /** Pointer to the ir source this tree came from for debugging */
@@ -201,7 +200,7 @@ public:
    int tex_target; /**< One of TEXTURE_*_INDEX */
    GLboolean tex_shadow;
 
-   class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
+   class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
 };
 
 class variable_storage : public exec_node {
@@ -317,15 +316,15 @@ public:
    /** List of glsl_to_tgsi_instruction */
    exec_list instructions;
 
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op);
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
 
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
         		        st_dst_reg dst, st_src_reg src0);
 
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
         		        st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
         		        st_dst_reg dst,
         		        st_src_reg src0, st_src_reg src1, st_src_reg src2);
 
@@ -338,13 +337,13 @@ public:
                 st_src_reg src1,
                 unsigned elements);
 
-   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+   void emit_scalar(ir_instruction *ir, unsigned op,
         	    st_dst_reg dst, st_src_reg src0);
 
-   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+   void emit_scalar(ir_instruction *ir, unsigned op,
         	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 
-   void emit_scs(ir_instruction *ir, enum prog_opcode op,
+   void emit_scs(ir_instruction *ir, unsigned op,
         	 st_dst_reg dst, const st_src_reg &src);
 
    GLboolean try_emit_mad(ir_expression *ir,
@@ -405,8 +404,29 @@ swizzle_for_size(int size)
    return size_swizzles[size - 1];
 }
 
+static bool
+is_tex_instruction(unsigned opcode)
+{
+   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+   return info->is_tex;
+}
+
+static unsigned
+num_inst_dst_regs(unsigned opcode)
+{
+   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+   return info->num_dst;
+}
+
+static unsigned
+num_inst_src_regs(unsigned opcode)
+{
+   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+   return info->is_tex ? info->num_src - 1 : info->num_src;
+}
+
 glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
         		 st_dst_reg dst,
         		 st_src_reg src0, st_src_reg src1, st_src_reg src2)
 {
@@ -427,7 +447,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
    reladdr_to_temp(ir, &src0, &num_reladdr);
 
    if (dst.reladdr) {
-      emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
+      emit(ir, TGSI_OPCODE_ARL, address_reg, *dst.reladdr);
       num_reladdr--;
    }
    assert(num_reladdr == 0);
@@ -441,7 +461,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 
    inst->function = NULL;
    
-   if (op == OPCODE_ARL)
+   if (op == TGSI_OPCODE_ARL)
       this->num_address_regs = 1;
    
    /* Update indirect addressing status used by TGSI */
@@ -491,14 +511,14 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 
 
 glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
         		 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
 {
    return emit(ir, op, dst, src0, src1, undef_src);
 }
 
 glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
         		 st_dst_reg dst, st_src_reg src0)
 {
    assert(dst.writemask != 0);
@@ -506,7 +526,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
 }
 
 glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op)
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
 {
    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 }
@@ -516,30 +536,30 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
         		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
         		    unsigned elements)
 {
-   static const gl_inst_opcode dot_opcodes[] = {
-      OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
+   static const unsigned dot_opcodes[] = {
+      TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
    };
 
    emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 }
 
 /**
- * Emits Mesa scalar opcodes to produce unique answers across channels.
+ * Emits TGSI scalar opcodes to produce unique answers across channels.
  *
- * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
+ * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
  * channel determines the result across all channels.  So to do a vec4
  * of this operation, we want to emit a scalar per source channel used
  * to produce dest channels.
  */
 void
-glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
         		        st_dst_reg dst,
         			st_src_reg orig_src0, st_src_reg orig_src1)
 {
    int i, j;
    int done_mask = ~dst.writemask;
 
-   /* Mesa RCP is a scalar operation splatting results to all channels,
+   /* TGSI RCP is a scalar operation splatting results to all channels,
     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
     * dst channels.
     */
@@ -577,7 +597,7 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
 }
 
 void
-glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
         		        st_dst_reg dst, st_src_reg src0)
 {
    st_src_reg undef = undef_src;
@@ -588,21 +608,21 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
 }
 
 /**
- * Emit an OPCODE_SCS instruction
+ * Emit an TGSI_OPCODE_SCS instruction
  *
- * The \c SCS opcode functions a bit differently than the other Mesa (or
- * ARB_fragment_program) opcodes.  Instead of splatting its result across all
- * four components of the destination, it writes one value to the \c x
- * component and another value to the \c y component.
+ * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
+ * Instead of splatting its result across all four components of the 
+ * destination, it writes one value to the \c x component and another value to 
+ * the \c y component.
  *
  * \param ir        IR instruction being processed
- * \param op        Either \c OPCODE_SIN or \c OPCODE_COS depending on which
- *                  value is desired.
+ * \param op        Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 
+ *                  on which value is desired.
  * \param dst       Destination register
  * \param src       Source register
  */
 void
-glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
+glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
         		     st_dst_reg dst,
         		     const st_src_reg &src)
 {
@@ -613,12 +633,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
       return;
    }
 
-   const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
+   const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
    const unsigned scs_mask = (1U << component);
    int done_mask = ~dst.writemask;
    st_src_reg tmp;
 
-   assert(op == OPCODE_SIN || op == OPCODE_COS);
+   assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
 
    /* If there are compnents in the destination that differ from the component
     * that will be written by the SCS instrution, we'll need a temporary.
@@ -661,7 +681,7 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
 
          /* Emit the SCS instruction.
           */
-         inst = emit(ir, OPCODE_SCS, tmp_dst, src0);
+         inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
          inst->dst.writemask = scs_mask;
 
          /* Move the result of the SCS instruction to the desired location in
@@ -669,12 +689,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
           */
          tmp.swizzle = MAKE_SWIZZLE4(component, component,
         			     component, component);
-         inst = emit(ir, OPCODE_SCS, dst, tmp);
+         inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
          inst->dst.writemask = this_mask;
       } else {
          /* Emit the SCS instruction to write directly to the destination.
           */
-         glsl_to_tgsi_instruction *inst = emit(ir, OPCODE_SCS, dst, src0);
+         glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
          inst->dst.writemask = scs_mask;
       }
 
@@ -870,7 +890,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
          } else {
             st_src_reg src(PROGRAM_STATE_VAR, index, NULL);
             src.swizzle = slots[i].swizzle;
-            emit(ir, OPCODE_MOV, dst, src);
+            emit(ir, TGSI_OPCODE_MOV, dst, src);
             /* even a float takes up a whole vec4 reg in a struct/array. */
             dst.index++;
          }
@@ -903,7 +923,7 @@ glsl_to_tgsi_visitor::visit(ir_loop *ir)
       delete a;
    }
 
-   emit(NULL, OPCODE_BGNLOOP);
+   emit(NULL, TGSI_OPCODE_BGNLOOP);
 
    if (ir->to) {
       ir_expression *e =
@@ -936,7 +956,7 @@ glsl_to_tgsi_visitor::visit(ir_loop *ir)
       delete e;
    }
 
-   emit(NULL, OPCODE_ENDLOOP);
+   emit(NULL, TGSI_OPCODE_ENDLOOP);
 }
 
 void
@@ -944,10 +964,10 @@ glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
 {
    switch (ir->mode) {
    case ir_loop_jump::jump_break:
-      emit(NULL, OPCODE_BRK);
+      emit(NULL, TGSI_OPCODE_BRK);
       break;
    case ir_loop_jump::jump_continue:
-      emit(NULL, OPCODE_CONT);
+      emit(NULL, TGSI_OPCODE_CONT);
       break;
    }
 }
@@ -1000,7 +1020,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    c = this->result;
 
    this->result = get_temp(ir->type);
-   emit(ir, OPCODE_MAD, st_dst_reg(this->result), a, b, c);
+   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c);
 
    return true;
 }
@@ -1023,7 +1043,7 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
 
    this->result = get_temp(ir->type);
    glsl_to_tgsi_instruction *inst;
-   inst = emit(ir, OPCODE_MOV, st_dst_reg(this->result), src);
+   inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src);
    inst->saturate = true;
 
    return true;
@@ -1036,135 +1056,18 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
    if (!reg->reladdr)
       return;
 
-   emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
+   emit(ir, TGSI_OPCODE_ARL, address_reg, *reg->reladdr);
 
    if (*num_reladdr != 1) {
       st_src_reg temp = get_temp(glsl_type::vec4_type);
 
-      emit(ir, OPCODE_MOV, st_dst_reg(temp), *reg);
+      emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
       *reg = temp;
    }
 
    (*num_reladdr)--;
 }
 
-void
-glsl_to_tgsi_visitor::emit_swz(ir_expression *ir)
-{
-   /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
-    * This means that each of the operands is either an immediate value of -1,
-    * 0, or 1, or is a component from one source register (possibly with
-    * negation).
-    */
-   uint8_t components[4] = { 0 };
-   bool negate[4] = { false };
-   ir_variable *var = NULL;
-
-   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
-      ir_rvalue *op = ir->operands[i];
-
-      assert(op->type->is_scalar());
-
-      while (op != NULL) {
-         switch (op->ir_type) {
-         case ir_type_constant: {
-
-            assert(op->type->is_scalar());
-
-            const ir_constant *const c = op->as_constant();
-            if (c->is_one()) {
-               components[i] = SWIZZLE_ONE;
-            } else if (c->is_zero()) {
-               components[i] = SWIZZLE_ZERO;
-            } else if (c->is_negative_one()) {
-               components[i] = SWIZZLE_ONE;
-               negate[i] = true;
-            } else {
-               assert(!"SWZ constant must be 0.0 or 1.0.");
-            }
-
-            op = NULL;
-            break;
-         }
-
-         case ir_type_dereference_variable: {
-            ir_dereference_variable *const deref =
-               (ir_dereference_variable *) op;
-
-            assert((var == NULL) || (deref->var == var));
-            components[i] = SWIZZLE_X;
-            var = deref->var;
-            op = NULL;
-            break;
-         }
-
-         case ir_type_expression: {
-            ir_expression *const expr = (ir_expression *) op;
-
-            assert(expr->operation == ir_unop_neg);
-            negate[i] = true;
-
-            op = expr->operands[0];
-            break;
-         }
-
-         case ir_type_swizzle: {
-            ir_swizzle *const swiz = (ir_swizzle *) op;
-
-            components[i] = swiz->mask.x;
-            op = swiz->val;
-            break;
-         }
-
-         default:
-            assert(!"Should not get here.");
-            return;
-         }
-      }
-   }
-
-   assert(var != NULL);
-
-   ir_dereference_variable *const deref =
-      new(mem_ctx) ir_dereference_variable(var);
-
-   this->result.file = PROGRAM_UNDEFINED;
-   deref->accept(this);
-   if (this->result.file == PROGRAM_UNDEFINED) {
-      ir_print_visitor v;
-      printf("Failed to get tree for expression operand:\n");
-      deref->accept(&v);
-      exit(1);
-   }
-
-   st_src_reg src;
-
-   src = this->result;
-   src.swizzle = MAKE_SWIZZLE4(components[0],
-        		       components[1],
-        		       components[2],
-        		       components[3]);
-   src.negate = ((unsigned(negate[0]) << 0)
-        	 | (unsigned(negate[1]) << 1)
-        	 | (unsigned(negate[2]) << 2)
-        	 | (unsigned(negate[3]) << 3));
-
-   /* Storage for our result.  Ideally for an assignment we'd be using the
-    * actual storage for the result here, instead.
-    */
-   const st_src_reg result_src = get_temp(ir->type);
-   st_dst_reg result_dst = st_dst_reg(result_src);
-
-   /* Limit writes to the channels that will be used by result_src later.
-    * This does limit this temp's use as a temporary for multi-instruction
-    * sequences.
-    */
-   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
-
-   emit(ir, OPCODE_SWZ, result_dst, src);
-   this->result = result_src;
-}
-
 void
 glsl_to_tgsi_visitor::visit(ir_expression *ir)
 {
@@ -1173,7 +1076,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    st_src_reg result_src;
    st_dst_reg result_dst;
 
-   /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
+   /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
     */
    if (ir->operation == ir_binop_add) {
       if (try_emit_mad(ir, 1))
@@ -1184,10 +1087,8 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    if (try_emit_sat(ir))
       return;
 
-   if (ir->operation == ir_quadop_vector) {
-      this->emit_swz(ir);
-      return;
-   }
+   if (ir->operation == ir_quadop_vector)
+      assert(!"ir_quadop_vector should have been lowered");
 
    for (operand = 0; operand < ir->get_num_operands(); operand++) {
       this->result.file = PROGRAM_UNDEFINED;
@@ -1228,51 +1129,51 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
 
    switch (ir->operation) {
    case ir_unop_logic_not:
-      emit(ir, OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0));
+      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0));
       break;
    case ir_unop_neg:
       op[0].negate = ~op[0].negate;
       result_src = op[0];
       break;
    case ir_unop_abs:
-      emit(ir, OPCODE_ABS, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
       break;
    case ir_unop_sign:
-      emit(ir, OPCODE_SSG, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
       break;
    case ir_unop_rcp:
-      emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
       break;
 
    case ir_unop_exp2:
-      emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
       break;
    case ir_unop_exp:
    case ir_unop_log:
       assert(!"not reached: should be handled by ir_explog_to_explog2");
       break;
    case ir_unop_log2:
-      emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
       break;
    case ir_unop_sin:
-      emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
       break;
    case ir_unop_cos:
-      emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
       break;
    case ir_unop_sin_reduced:
-      emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
+      emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
       break;
    case ir_unop_cos_reduced:
-      emit_scs(ir, OPCODE_COS, result_dst, op[0]);
+      emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
       break;
 
    case ir_unop_dFdx:
-      emit(ir, OPCODE_DDX, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
       break;
    case ir_unop_dFdy:
       op[0].negate = ~op[0].negate;
-      emit(ir, OPCODE_DDY, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]);
       break;
 
    case ir_unop_noise: {
@@ -1282,19 +1183,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        * place to do this is in the GL state tracker, not the poor
        * driver.
        */
-      emit(ir, OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
+      emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
       break;
    }
 
    case ir_binop_add:
-      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
       break;
    case ir_binop_sub:
-      emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_mul:
-      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
       break;
    case ir_binop_div:
       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
@@ -1303,33 +1204,33 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       break;
 
    case ir_binop_less:
-      emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
       break;
    case ir_binop_greater:
-      emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
       break;
    case ir_binop_lequal:
-      emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
       break;
    case ir_binop_gequal:
-      emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
       break;
    case ir_binop_equal:
-      emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
       break;
    case ir_binop_nequal:
-      emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       break;
    case ir_binop_all_equal:
       /* "==" operator producing a scalar boolean. */
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
          st_src_reg temp = get_temp(glsl_type::vec4_type);
-         emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
          emit_dp(ir, result_dst, temp, temp, vector_elements);
-         emit(ir, OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
+         emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
       } else {
-         emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
+         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
       }
       break;
    case ir_binop_any_nequal:
@@ -1337,11 +1238,11 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
          st_src_reg temp = get_temp(glsl_type::vec4_type);
-         emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
          emit_dp(ir, result_dst, temp, temp, vector_elements);
-         emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
       } else {
-         emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
+         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       }
       break;
 
@@ -1349,22 +1250,22 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       assert(ir->operands[0]->type->is_vector());
       emit_dp(ir, result_dst, op[0], op[0],
               ir->operands[0]->type->vector_elements);
-      emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
       break;
 
    case ir_binop_logic_xor:
-      emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_logic_or:
       /* This could be a saturated add and skip the SNE. */
-      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
-      emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
       break;
 
    case ir_binop_logic_and:
       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
-      emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_dot:
@@ -1376,15 +1277,15 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
 
    case ir_unop_sqrt:
       /* sqrt(x) = x * rsq(x). */
-      emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
-      emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
       /* For incoming channels <= 0, set the result to 0. */
       op[0].negate = ~op[0].negate;
-      emit(ir, OPCODE_CMP, result_dst,
+      emit(ir, TGSI_OPCODE_CMP, result_dst,
         		  op[0], result_src, st_src_reg_for_float(0.0));
       break;
    case ir_unop_rsq:
-      emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
+      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
       break;
    case ir_unop_i2f:
    case ir_unop_b2f:
@@ -1393,36 +1294,36 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       result_src = op[0];
       break;
    case ir_unop_f2i:
-      emit(ir, OPCODE_TRUNC, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
       break;
    case ir_unop_f2b:
    case ir_unop_i2b:
-      emit(ir, OPCODE_SNE, result_dst,
+      emit(ir, TGSI_OPCODE_SNE, result_dst,
         		  op[0], st_src_reg_for_float(0.0));
       break;
    case ir_unop_trunc:
-      emit(ir, OPCODE_TRUNC, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
       break;
    case ir_unop_ceil:
       op[0].negate = ~op[0].negate;
-      emit(ir, OPCODE_FLR, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
       result_src.negate = ~result_src.negate;
       break;
    case ir_unop_floor:
-      emit(ir, OPCODE_FLR, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
       break;
    case ir_unop_fract:
-      emit(ir, OPCODE_FRC, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
       break;
 
    case ir_binop_min:
-      emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
       break;
    case ir_binop_max:
-      emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
       break;
    case ir_binop_pow:
-      emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
+      emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
       break;
 
    case ir_unop_bit_not:
@@ -1586,7 +1487,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
       } else {
          index_reg = get_temp(glsl_type::float_type);
 
-         emit(ir, OPCODE_MUL, st_dst_reg(index_reg),
+         emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
               this->result, st_src_reg_for_float(element_size));
       }
 
@@ -1728,9 +1629,9 @@ glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
 
    src_ir->accept(this);
 
-   /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
+   /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
-    * choose which value OPCODE_CMP produces without an extra instruction
+    * choose which value TGSI_OPCODE_CMP produces without an extra instruction
     * computing the condition.
     */
    if (negate)
@@ -1803,9 +1704,9 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
 
       for (i = 0; i < type_size(ir->lhs->type); i++) {
          if (switch_order) {
-            emit(ir, OPCODE_CMP, l, condition, st_src_reg(l), r);
+            emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r);
          } else {
-            emit(ir, OPCODE_CMP, l, condition, r, st_src_reg(l));
+            emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l));
          }
 
          l.index++;
@@ -1813,7 +1714,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       }
    } else {
       for (i = 0; i < type_size(ir->lhs->type); i++) {
-         emit(ir, OPCODE_MOV, l, r);
+         emit(ir, TGSI_OPCODE_MOV, l, r);
          l.index++;
          r.index++;
       }
@@ -1849,7 +1750,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          src = this->result;
 
          for (i = 0; i < (unsigned int)size; i++) {
-            emit(ir, OPCODE_MOV, temp, src);
+            emit(ir, TGSI_OPCODE_MOV, temp, src);
 
             src.index++;
             temp.index++;
@@ -1870,7 +1771,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          ir->array_elements[i]->accept(this);
          src = this->result;
          for (int j = 0; j < size; j++) {
-            emit(ir, OPCODE_MOV, temp, src);
+            emit(ir, TGSI_OPCODE_MOV, temp, src);
 
             src.index++;
             temp.index++;
@@ -1893,7 +1794,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
         					values,
         					ir->type->vector_elements,
         					&src.swizzle);
-         emit(ir, OPCODE_MOV, mat_column, src);
+         emit(ir, TGSI_OPCODE_MOV, mat_column, src);
 
          mat_column.index++;
       }
@@ -2005,7 +1906,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
          l.cond_mask = COND_TR;
 
          for (i = 0; i < type_size(param->type); i++) {
-            emit(ir, OPCODE_MOV, l, r);
+            emit(ir, TGSI_OPCODE_MOV, l, r);
             l.index++;
             r.index++;
          }
@@ -2016,7 +1917,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
    assert(!sig_iter.has_next());
 
    /* Emit call instruction */
-   call_inst = emit(ir, OPCODE_CAL);
+   call_inst = emit(ir, TGSI_OPCODE_CAL);
    call_inst->function = entry;
 
    /* Process out parameters. */
@@ -2041,7 +1942,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
          st_dst_reg l = st_dst_reg(this->result);
 
          for (i = 0; i < type_size(param->type); i++) {
-            emit(ir, OPCODE_MOV, l, r);
+            emit(ir, TGSI_OPCODE_MOV, l, r);
             l.index++;
             r.index++;
          }
@@ -2061,7 +1962,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
    st_src_reg result_src, coord, lod_info, projector, dx, dy;
    st_dst_reg result_dst, coord_dst;
    glsl_to_tgsi_instruction *inst = NULL;
-   prog_opcode opcode = OPCODE_NOP;
+   unsigned opcode = TGSI_OPCODE_NOP;
 
    ir->coordinate->accept(this);
 
@@ -2072,7 +1973,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
     */
    coord = get_temp(glsl_type::vec4_type);
    coord_dst = st_dst_reg(coord);
-   emit(ir, OPCODE_MOV, coord_dst, this->result);
+   emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
 
    if (ir->projector) {
       ir->projector->accept(this);
@@ -2087,20 +1988,20 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
 
    switch (ir->op) {
    case ir_tex:
-      opcode = OPCODE_TEX;
+      opcode = TGSI_OPCODE_TEX;
       break;
    case ir_txb:
-      opcode = OPCODE_TXB;
+      opcode = TGSI_OPCODE_TXB;
       ir->lod_info.bias->accept(this);
       lod_info = this->result;
       break;
    case ir_txl:
-      opcode = OPCODE_TXL;
+      opcode = TGSI_OPCODE_TXL;
       ir->lod_info.lod->accept(this);
       lod_info = this->result;
       break;
    case ir_txd:
-      opcode = OPCODE_TXD;
+      opcode = TGSI_OPCODE_TXD;
       ir->lod_info.grad.dPdx->accept(this);
       dx = this->result;
       ir->lod_info.grad.dPdy->accept(this);
@@ -2112,25 +2013,25 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
    }
 
    if (ir->projector) {
-      if (opcode == OPCODE_TEX) {
+      if (opcode == TGSI_OPCODE_TEX) {
          /* Slot the projector in as the last component of the coord. */
          coord_dst.writemask = WRITEMASK_W;
-         emit(ir, OPCODE_MOV, coord_dst, projector);
+         emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
          coord_dst.writemask = WRITEMASK_XYZW;
-         opcode = OPCODE_TXP;
+         opcode = TGSI_OPCODE_TXP;
       } else {
          st_src_reg coord_w = coord;
          coord_w.swizzle = SWIZZLE_WWWW;
 
          /* For the other TEX opcodes there's no projective version
-          * since the last slot is taken up by lod info.  Do the
+          * since the last slot is taken up by LOD info.  Do the
           * projective divide now.
           */
          coord_dst.writemask = WRITEMASK_W;
-         emit(ir, OPCODE_RCP, coord_dst, projector);
+         emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
 
          /* In the case where we have to project the coordinates "by hand,"
-          * the shadow comparitor value must also be projected.
+          * the shadow comparator value must also be projected.
           */
          st_src_reg tmp_src = coord;
          if (ir->shadow_comparitor) {
@@ -2143,42 +2044,42 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
             st_dst_reg tmp_dst = st_dst_reg(tmp_src);
 
             tmp_dst.writemask = WRITEMASK_Z;
-            emit(ir, OPCODE_MOV, tmp_dst, this->result);
+            emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
 
             tmp_dst.writemask = WRITEMASK_XY;
-            emit(ir, OPCODE_MOV, tmp_dst, coord);
+            emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
          }
 
          coord_dst.writemask = WRITEMASK_XYZ;
-         emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
+         emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
 
          coord_dst.writemask = WRITEMASK_XYZW;
          coord.swizzle = SWIZZLE_XYZW;
       }
    }
 
-   /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
-    * comparitor was put in the correct place (and projected) by the code,
+   /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
+    * comparator was put in the correct place (and projected) by the code,
     * above, that handles by-hand projection.
     */
-   if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) {
+   if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
       /* Slot the shadow value in as the second to last component of the
        * coord.
        */
       ir->shadow_comparitor->accept(this);
       coord_dst.writemask = WRITEMASK_Z;
-      emit(ir, OPCODE_MOV, coord_dst, this->result);
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
       coord_dst.writemask = WRITEMASK_XYZW;
    }
 
-   if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
-      /* Mesa IR stores lod or lod bias in the last channel of the coords. */
+   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) {
+      /* TGSI stores LOD or LOD bias in the last channel of the coords. */
       coord_dst.writemask = WRITEMASK_W;
-      emit(ir, OPCODE_MOV, coord_dst, lod_info);
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
       coord_dst.writemask = WRITEMASK_XYZW;
    }
 
-   if (opcode == OPCODE_TXD)
+   if (opcode == TGSI_OPCODE_TXD)
       inst = emit(ir, opcode, result_dst, coord, dx, dy);
    else
       inst = emit(ir, opcode, result_dst, coord);
@@ -2235,13 +2136,13 @@ glsl_to_tgsi_visitor::visit(ir_return *ir)
       l = st_dst_reg(current_function->return_reg);
 
       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
-         emit(ir, OPCODE_MOV, l, r);
+         emit(ir, TGSI_OPCODE_MOV, l, r);
          l.index++;
          r.index++;
       }
    }
 
-   emit(ir, OPCODE_RET);
+   emit(ir, TGSI_OPCODE_RET);
 }
 
 void
@@ -2252,9 +2153,9 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir)
    if (ir->condition) {
       ir->condition->accept(this);
       this->result.negate = ~this->result.negate;
-      emit(ir, OPCODE_KIL, undef_dst, this->result);
+      emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
    } else {
-      emit(ir, OPCODE_KIL_NV);
+      emit(ir, TGSI_OPCODE_KILP);
    }
 
    fp->UsesKill = GL_TRUE;
@@ -2280,14 +2181,14 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
        */
       if (cond_inst == prev_inst) {
          st_src_reg temp = get_temp(glsl_type::bool_type);
-         cond_inst = emit(ir->condition, OPCODE_MOV, st_dst_reg(temp), result);
+         cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
       }
       cond_inst->cond_update = GL_TRUE;
 
-      if_inst = emit(ir->condition, OPCODE_IF);
+      if_inst = emit(ir->condition, TGSI_OPCODE_IF);
       if_inst->dst.cond_mask = COND_NE;
    } else {
-      if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
+      if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
    }
 
    this->instructions.push_tail(if_inst);
@@ -2295,11 +2196,11 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
    visit_exec_list(&ir->then_instructions, this);
 
    if (!ir->else_instructions.is_empty()) {
-      else_inst = emit(ir->condition, OPCODE_ELSE);
+      else_inst = emit(ir->condition, TGSI_OPCODE_ELSE);
       visit_exec_list(&ir->else_instructions, this);
    }
 
-   if_inst = emit(ir->condition, OPCODE_ENDIF);
+   if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
 }
 
 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
@@ -2337,7 +2238,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
    foreach_iter(exec_list_iterator, iter, v->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
 
-      if (_mesa_is_tex_instruction(inst->op)) {
+      if (is_tex_instruction(inst->op)) {
          v->samplers_used |= 1 << inst->sampler;
 
          prog->SamplerTargets[inst->sampler] =
@@ -2648,7 +2549,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
    /* look for instructions which read from varying vars */
    foreach_iter(exec_list_iterator, iter, this->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
-      const GLuint numSrc = _mesa_num_inst_src_regs(inst->op);
+      const GLuint numSrc = num_inst_src_regs(inst->op);
       GLuint j;
       for (j = 0; j < numSrc; j++) {
          if (inst->src[j].file == type) {
@@ -2687,7 +2588,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
          st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]);
          st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW);
          dst.index = i;
-         this->emit(NULL, OPCODE_MOV, dst, src);
+         this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
       }
    }
 }
@@ -2700,7 +2601,7 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
       unsigned j;
       
-      for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
          if (inst->src[j].file == PROGRAM_TEMPORARY && 
              inst->src[j].index == index) {
             inst->src[j].index = new_index;
@@ -2723,17 +2624,17 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
    foreach_iter(exec_list_iterator, iter, this->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
       
-      for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
          if (inst->src[j].file == PROGRAM_TEMPORARY && 
              inst->src[j].index == index) {
             return (depth == 0) ? i : loop_start;
          }
       }
       
-      if (inst->op == OPCODE_BGNLOOP) {
+      if (inst->op == TGSI_OPCODE_BGNLOOP) {
          if(depth++ == 0)
             loop_start = i;
-      } else if (inst->op == OPCODE_ENDLOOP) {
+      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
          if (--depth == 0)
             loop_start = -1;
       }
@@ -2759,10 +2660,10 @@ glsl_to_tgsi_visitor::get_first_temp_write(int index)
          return (depth == 0) ? i : loop_start;
       }
       
-      if (inst->op == OPCODE_BGNLOOP) {
+      if (inst->op == TGSI_OPCODE_BGNLOOP) {
          if(depth++ == 0)
             loop_start = i;
-      } else if (inst->op == OPCODE_ENDLOOP) {
+      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
          if (--depth == 0)
             loop_start = -1;
       }
@@ -2784,16 +2685,16 @@ glsl_to_tgsi_visitor::get_last_temp_read(int index)
    foreach_iter(exec_list_iterator, iter, this->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
       
-      for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
          if (inst->src[j].file == PROGRAM_TEMPORARY && 
              inst->src[j].index == index) {
             last = (depth == 0) ? i : -2;
          }
       }
       
-      if (inst->op == OPCODE_BGNLOOP)
+      if (inst->op == TGSI_OPCODE_BGNLOOP)
          depth++;
-      else if (inst->op == OPCODE_ENDLOOP)
+      else if (inst->op == TGSI_OPCODE_ENDLOOP)
          if (--depth == 0 && last == -2)
             last = i;
       assert(depth >= 0);
@@ -2818,9 +2719,9 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index)
       if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
          last = (depth == 0) ? i : -2;
       
-      if (inst->op == OPCODE_BGNLOOP)
+      if (inst->op == TGSI_OPCODE_BGNLOOP)
          depth++;
-      else if (inst->op == OPCODE_ENDLOOP)
+      else if (inst->op == TGSI_OPCODE_ENDLOOP)
          if (--depth == 0 && last == -2)
             last = i;
       assert(depth >= 0);
@@ -2922,18 +2823,18 @@ glsl_to_tgsi_visitor::copy_propagate(void)
       }
 
       switch (inst->op) {
-      case OPCODE_BGNLOOP:
-      case OPCODE_ENDLOOP:
+      case TGSI_OPCODE_BGNLOOP:
+      case TGSI_OPCODE_ENDLOOP:
          /* End of a basic block, clear the ACP entirely. */
          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
          break;
 
-      case OPCODE_IF:
+      case TGSI_OPCODE_IF:
          ++level;
          break;
 
-      case OPCODE_ENDIF:
-      case OPCODE_ELSE:
+      case TGSI_OPCODE_ENDIF:
+      case TGSI_OPCODE_ELSE:
          /* Clear all channels written inside the block from the ACP, but
           * leaving those that were not touched.
           */
@@ -2946,7 +2847,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
         	  acp[4 * r + c] = NULL;
             }
          }
-         if (inst->op == OPCODE_ENDIF)
+         if (inst->op == TGSI_OPCODE_ENDIF)
             --level;
          break;
 
@@ -3005,7 +2906,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
       }
 
       /* If this is a copy, add it to the ACP. */
-      if (inst->op == OPCODE_MOV &&
+      if (inst->op == TGSI_OPCODE_MOV &&
           inst->dst.file == PROGRAM_TEMPORARY &&
           !inst->dst.reladdr &&
           !inst->saturate &&
@@ -3337,11 +3238,11 @@ src_register( struct st_translate *t,
 }
 
 /**
- * Create a TGSI ureg_dst register from a Mesa dest register.
+ * Create a TGSI ureg_dst register from an st_dst_reg.
  */
 static struct ureg_dst
 translate_dst( struct st_translate *t,
-               const st_dst_reg *dst_reg, //const struct prog_dst_register *DstReg,
+               const st_dst_reg *dst_reg,
                boolean saturate )
 {
    struct ureg_dst dst = dst_register( t, 
@@ -3361,7 +3262,7 @@ translate_dst( struct st_translate *t,
 }
 
 /**
- * Create a TGSI ureg_src register from a Mesa src register.
+ * Create a TGSI ureg_src register from an st_src_reg.
  */
 static struct ureg_src
 translate_src( struct st_translate *t,
@@ -3378,12 +3279,6 @@ translate_src( struct st_translate *t,
    if ((src_reg->negate & 0xf) == NEGATE_XYZW)
       src = ureg_negate(src);
 
-#if 0
-   // src_reg currently does not have an equivalent to SrcReg->Abs in Mesa IR
-   if (src_reg->abs) 
-      src = ureg_abs(src);
-#endif
-
    if (src_reg->reladdr != NULL) {
       /* Normally ureg_src_indirect() would be used here, but a stupid compiler 
        * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 
@@ -3421,77 +3316,64 @@ compile_tgsi_instruction(struct st_translate *t,
    unsigned num_dst;
    unsigned num_src;
 
-   num_dst = _mesa_num_inst_dst_regs( inst->op );
-   num_src = _mesa_num_inst_src_regs( inst->op );
+   num_dst = num_inst_dst_regs( inst->op );
+   num_src = num_inst_src_regs( inst->op );
 
    if (num_dst) 
       dst[0] = translate_dst( t, 
                               &inst->dst,
-                              inst->saturate); // inst->SaturateMode
+                              inst->saturate);
 
    for (i = 0; i < num_src; i++) 
       src[i] = translate_src( t, &inst->src[i] );
 
    switch( inst->op ) {
-   case OPCODE_SWZ:
-      // TODO: copy emit_swz function from st_mesa_to_tgsi.c
-      //emit_swz( t, dst[0], &inst->src[0] );
-      assert(!"OPCODE_SWZ");
-      return;
-
-   case OPCODE_BGNLOOP:
-   case OPCODE_CAL:
-   case OPCODE_ELSE:
-   case OPCODE_ENDLOOP:
-   case OPCODE_IF:
+   case TGSI_OPCODE_BGNLOOP:
+   case TGSI_OPCODE_CAL:
+   case TGSI_OPCODE_ELSE:
+   case TGSI_OPCODE_ENDLOOP:
+   case TGSI_OPCODE_IF:
       debug_assert(num_dst == 0);
       ureg_label_insn( ureg,
-                       translate_opcode( inst->op ),
+                       inst->op,
                        src, num_src,
                        get_label( t, 
-                                  inst->op == OPCODE_CAL ? inst->function->sig_id : 0 ));
+                                  inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 ));
       return;
 
-   case OPCODE_TEX:
-   case OPCODE_TXB:
-   case OPCODE_TXD:
-   case OPCODE_TXL:
-   case OPCODE_TXP:
+   case TGSI_OPCODE_TEX:
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXD:
+   case TGSI_OPCODE_TXL:
+   case TGSI_OPCODE_TXP:
       src[num_src++] = t->samplers[inst->sampler];
       ureg_tex_insn( ureg,
-                     translate_opcode( inst->op ),
+                     inst->op,
                      dst, num_dst, 
                      translate_texture_target( inst->tex_target,
                                                inst->tex_shadow ),
                      src, num_src );
       return;
 
-   case OPCODE_SCS:
+   case TGSI_OPCODE_SCS:
       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
       ureg_insn( ureg, 
-                 translate_opcode( inst->op ), 
+                 inst->op, 
                  dst, num_dst, 
                  src, num_src );
       break;
 
-   case OPCODE_XPD:
+   case TGSI_OPCODE_XPD:
       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
       ureg_insn( ureg, 
-                 translate_opcode( inst->op ), 
+                 inst->op, 
                  dst, num_dst, 
                  src, num_src );
       break;
 
-   case OPCODE_NOISE1:
-   case OPCODE_NOISE2:
-   case OPCODE_NOISE3:
-   case OPCODE_NOISE4:
-      assert(!"OPCODE_NOISE should have been lowered\n");
-      break;
-
    default:
       ureg_insn( ureg, 
-                 translate_opcode( inst->op ), 
+                 inst->op, 
                  dst, num_dst, 
                  src, num_src );
       break;
@@ -3993,9 +3875,8 @@ get_mesa_program(struct gl_context *ctx,
 
    add_uniforms_to_parameters_list(shader_program, shader, prog);
 
-   /* Emit Mesa IR for main(). */
+   /* Emit intermediate IR for main(). */
    visit_exec_list(shader->ir, v);
-   v->emit(NULL, OPCODE_END);
 
    /* Now emit bodies for any functions that were used. */
    do {
@@ -4007,18 +3888,18 @@ get_mesa_program(struct gl_context *ctx,
          if (!entry->bgn_inst) {
             v->current_function = entry;
 
-            entry->bgn_inst = v->emit(NULL, OPCODE_BGNSUB);
+            entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
             entry->bgn_inst->function = entry;
 
             visit_exec_list(&entry->sig->body, v);
 
             glsl_to_tgsi_instruction *last;
             last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
-            if (last->op != OPCODE_RET)
-               v->emit(NULL, OPCODE_RET);
+            if (last->op != TGSI_OPCODE_RET)
+               v->emit(NULL, TGSI_OPCODE_RET);
 
             glsl_to_tgsi_instruction *end;
-            end = v->emit(NULL, OPCODE_ENDSUB);
+            end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
             end->function = entry;
 
             progress = GL_TRUE;
@@ -4050,6 +3931,9 @@ get_mesa_program(struct gl_context *ctx,
    v->eliminate_dead_code();
    v->merge_registers();
    v->renumber_registers();
+   
+   /* Write the END instruction. */
+   v->emit(NULL, TGSI_OPCODE_END);
 
    if (ctx->Shader.Flags & GLSL_DUMP) {
       printf("\n");
@@ -4127,8 +4011,8 @@ st_new_shader_program(struct gl_context *ctx, GLuint name)
 /**
  * Link a shader.
  * Called via ctx->Driver.LinkShader()
- * This actually involves converting GLSL IR into Mesa gl_programs with
- * code lowering and other optimizations.
+ * This actually involves converting GLSL IR into an intermediate TGSI-like IR 
+ * with code lowering and other optimizations.
  */
 GLboolean
 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)

From 16d7a717d592524e4d62fec4173cb9523f7a1453 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 2 May 2011 23:12:18 -0500
Subject: [PATCH 149/600] glsl_to_tgsi: fix shaders with indirect addressing of
 temps

Fixes several Piglit tests, although it's a step backwards for optimization.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 4cb2f377e98..75ab9c5de7c 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -485,7 +485,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
    else {
       for (i=0; i<3; i++) {
          if(inst->src[i].reladdr) {
-            switch(dst.file) {
+            switch(inst->src[i].file) {
             case PROGRAM_TEMPORARY:
                this->indirect_addr_temps = true;
                break;
@@ -3928,9 +3928,17 @@ get_mesa_program(struct gl_context *ctx,
 
    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
    v->copy_propagate();
-   v->eliminate_dead_code();
-   v->merge_registers();
-   v->renumber_registers();
+   
+   /* FIXME: These passes to optimize temporary registers don't work when there
+    * is indirect addressing of the temporary register space.  We need proper 
+    * array support so that we don't have to give up these passes in every 
+    * shader that uses arrays.
+    */
+   if (!v->indirect_addr_temps) {
+      v->merge_registers();
+      v->eliminate_dead_code();
+      v->renumber_registers();
+   }
    
    /* Write the END instruction. */
    v->emit(NULL, TGSI_OPCODE_END);

From 17b695e6e7dd730497fb60a8e161935b23fa0e9c Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 5 May 2011 21:10:28 -0500
Subject: [PATCH 150/600] gallium: add PIPE_SHADER_CAP_INTEGERS

---
 src/gallium/auxiliary/tgsi/tgsi_exec.h | 2 ++
 src/gallium/drivers/i915/i915_screen.c | 2 ++
 src/gallium/drivers/i965/brw_screen.c  | 2 ++
 src/gallium/drivers/nv50/nv50_screen.c | 2 ++
 src/gallium/drivers/nvc0/nvc0_screen.c | 2 ++
 src/gallium/drivers/nvfx/nvfx_screen.c | 2 ++
 src/gallium/drivers/r300/r300_screen.c | 2 ++
 src/gallium/drivers/r600/r600_pipe.c   | 2 ++
 src/gallium/drivers/svga/svga_screen.c | 2 ++
 src/gallium/include/pipe/p_defines.h   | 1 +
 10 files changed, 19 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 33f33aa82c7..6c32ccff323 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -400,6 +400,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
       return 1;
    case PIPE_SHADER_CAP_SUBROUTINES:
       return 1;
+   case PIPE_SHADER_CAP_INTEGERS:
+      return 1;
    default:
       return 0;
    }
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index c86baa58b28..5b3af2519fc 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -222,6 +222,8 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
          return 1;
       case PIPE_SHADER_CAP_SUBROUTINES:
          return 0;
+      case PIPE_SHADER_CAP_INTEGERS:
+         return 0;
       default:
          debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
          return 0;
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 9178dfa8f69..39e9e2fa6ac 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -243,6 +243,8 @@ brw_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shad
           return 1;
       case PIPE_SHADER_CAP_SUBROUTINES:
           return 1;
+      case PIPE_SHADER_CAP_INTEGERS:
+         return 0;
       default:
          assert(0);
          return 0;
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index cc921d08666..7e436fd47d8 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -180,6 +180,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
       return 1;
    case PIPE_SHADER_CAP_SUBROUTINES:
       return 0; /* please inline, or provide function declarations */
+   case PIPE_SHADER_CAP_INTEGERS:
+      return 0;
    default:
       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
       return 0;
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index 34bf0f0a2ad..52143981500 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -167,6 +167,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
       return 1;
    case PIPE_SHADER_CAP_SUBROUTINES:
       return 0; /* please inline, or provide function declarations */
+   case PIPE_SHADER_CAP_INTEGERS:
+      return 0;
    default:
       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
       return 0;
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index 475138c3c32..d880b12fcaa 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -174,6 +174,8 @@ nvfx_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, enum
 			return 1;
 		case PIPE_SHADER_CAP_SUBROUTINES:
 			return 1;
+		case PIPE_SHADER_CAP_INTEGERS:
+			return 0;
 		default:
 			break;
 		}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index fae03acb6d1..93baba68150 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -256,6 +256,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
             return 1;
         case PIPE_SHADER_CAP_SUBROUTINES:
             return 0;
+        case PIPE_SHADER_CAP_INTEGERS:
+            return 0;
         default:
             break;
         }
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 16fe6c54a15..2d744137522 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -504,6 +504,8 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
 		return 1;
 	case PIPE_SHADER_CAP_SUBROUTINES:
 		return 0;
+	case PIPE_SHADER_CAP_INTEGERS:
+		return 0;
 	default:
 		return 0;
 	}
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index b847cf331b3..4be10ef5821 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -286,6 +286,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
          return 1;
       case PIPE_SHADER_CAP_SUBROUTINES:
          return 0;
+      case PIPE_SHADER_CAP_INTEGERS:
+         return 0;
       default:
          break;
       }
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index c0c2a7c7fd2..2c95c204e5b 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -491,6 +491,7 @@ enum pipe_shader_cap
    PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR = 14,
    PIPE_SHADER_CAP_INDIRECT_CONST_ADDR = 15,
    PIPE_SHADER_CAP_SUBROUTINES = 16, /* BGNSUB, ENDSUB, CAL, RET */
+   PIPE_SHADER_CAP_INTEGERS = 17,
 };
 
 

From 6d89abadbcd68bbe9e08f041412549f8dc1fc73c Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Tue, 17 May 2011 17:13:20 -0500
Subject: [PATCH 151/600] mesa: support boolean and integer-based parameters in
 prog_parameter

The functionality is not used by anything yet, and the glUniform functions will
need to be reworked before this can reach its full usefulness.  It is
nonetheless a step towards integer support in the state tracker and classic drivers.
---
 src/mesa/main/ff_fragment_shader.cpp       |  3 +-
 src/mesa/main/ffvertex_prog.c              | 10 ++---
 src/mesa/main/uniforms.c                   | 12 +++---
 src/mesa/program/ir_to_mesa.cpp            |  8 ++--
 src/mesa/program/nvfragparse.c             | 23 ++++++----
 src/mesa/program/prog_execute.c            |  2 +-
 src/mesa/program/prog_parameter.c          | 50 +++++++++++-----------
 src/mesa/program/prog_parameter.h          | 25 +++++++----
 src/mesa/program/prog_parameter_layout.c   |  2 +-
 src/mesa/program/prog_print.c              |  2 +-
 src/mesa/program/program.c                 |  3 +-
 src/mesa/program/sampler.cpp               |  2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 12 +++---
 13 files changed, 88 insertions(+), 66 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp
index 0b53c28f7ae..2ccbaf8f8c3 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -875,7 +875,8 @@ static struct ureg register_const4f( struct texenv_fragment_program *p,
    values[1] = s1;
    values[2] = s2;
    values[3] = s3;
-   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
+   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters,
+                                     (gl_constant_value *) values, 4,
                                      &swizzle );
    r = make_ureg(PROGRAM_CONSTANT, idx);
    r.swz = swizzle;
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index b8e49a3757f..2d2485c9e06 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -455,13 +455,13 @@ static struct ureg register_const4f( struct tnl_program *p,
 			      GLfloat s2,
 			      GLfloat s3)
 {
-   GLfloat values[4];
+   gl_constant_value values[4];
    GLint idx;
    GLuint swizzle;
-   values[0] = s0;
-   values[1] = s1;
-   values[2] = s2;
-   values[3] = s3;
+   values[0].f = s0;
+   values[1].f = s1;
+   values[2].f = s2;
+   values[3].f = s3;
    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
                                      &swizzle );
    ASSERT(swizzle == SWIZZLE_NOOP);
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 1c4fd82baac..07d46c6404f 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -429,7 +429,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = prog->Parameters->ParameterValues[base][j];
+                  params[k++] = prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -442,7 +442,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
                   params[k++] = (GLdouble)
-                     prog->Parameters->ParameterValues[base][j];
+                     prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -455,7 +455,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
                   params[k++] = (GLint)
-                     prog->Parameters->ParameterValues[base][j];
+                     prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -468,7 +468,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
                   params[k++] = (GLuint)
-                     prog->Parameters->ParameterValues[base][j];
+                     prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -670,7 +670,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
       /* loop over number of samplers to change */
       for (i = 0; i < count; i++) {
          GLuint sampler = (GLuint)
-            program->Parameters->ParameterValues[index + offset + i][0];
+            program->Parameters->ParameterValues[index+offset + i][0].f;
          GLuint texUnit = ((GLuint *) values)[i];
 
          /* check that the sampler (tex unit index) is legal */
@@ -936,7 +936,7 @@ set_program_uniform_matrix(struct gl_context *ctx, struct gl_program *program,
             /* Ignore writes beyond the end of (the used part of) an array */
             return;
          }
-         v = program->Parameters->ParameterValues[index + offset];
+         v = (GLfloat *) program->Parameters->ParameterValues[index + offset];
          for (row = 0; row < rows; row++) {
             if (transpose) {
                v[row] = values[src + row * cols + col];
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 00869979dd8..f27492749bd 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -599,7 +599,7 @@ ir_to_mesa_visitor::src_reg_for_float(float val)
    src_reg src(PROGRAM_CONSTANT, -1, NULL);
 
    src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-					  &val, 1, &src.swizzle);
+					  (const gl_constant_value *)&val, 1, &src.swizzle);
 
    return src;
 }
@@ -1798,7 +1798,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir)
 
 	 src = src_reg(PROGRAM_CONSTANT, -1, NULL);
 	 src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-						values,
+						(gl_constant_value *) values,
 						ir->type->vector_elements,
 						&src.swizzle);
 	 emit(ir, OPCODE_MOV, mat_column, src);
@@ -1836,7 +1836,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir)
 
    this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-						   values,
+						   (gl_constant_value *) values,
 						   ir->type->vector_elements,
 						   &this->result.swizzle);
 }
@@ -2533,7 +2533,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
 	  */
 	 if (file == PROGRAM_SAMPLER) {
 	    for (unsigned int j = 0; j < size / 4; j++)
-	       prog->Parameters->ParameterValues[index + j][0] = next_sampler++;
+	       prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
 	 }
 
 	 /* The location chosen in the Parameters list here (returned
diff --git a/src/mesa/program/nvfragparse.c b/src/mesa/program/nvfragparse.c
index 8516b5fc1ff..ce72c610d89 100644
--- a/src/mesa/program/nvfragparse.c
+++ b/src/mesa/program/nvfragparse.c
@@ -472,8 +472,9 @@ Parse_ScalarConstant(struct parse_state *parseState, GLfloat *number)
       const GLfloat *constant;
       if (!Parse_Identifier(parseState, ident))
          RETURN_ERROR1("Expected an identifier");
-      constant = _mesa_lookup_parameter_value(parseState->parameters,
-                                              -1, (const char *) ident);
+      constant = (GLfloat *)_mesa_lookup_parameter_value(parseState->parameters,
+                                                         -1, 
+                                                         (const char *) ident);
       /* XXX Check that it's a constant and not a parameter */
       if (!constant) {
          RETURN_ERROR1("Undefined symbol");
@@ -1039,7 +1040,8 @@ Parse_VectorSrc(struct parse_state *parseState,
       if (!Parse_ScalarConstant(parseState, values))
          RETURN_ERROR;
       paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
+                                              (gl_constant_value *) values,
+                                              4, NULL);
       srcReg->File = PROGRAM_NAMED_PARAM;
       srcReg->Index = paramIndex;
    }
@@ -1051,7 +1053,8 @@ Parse_VectorSrc(struct parse_state *parseState,
       if (!Parse_VectorConstant(parseState, values))
          RETURN_ERROR;
       paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
+                                              (gl_constant_value *) values,
+                                              4, NULL);
       srcReg->File = PROGRAM_NAMED_PARAM;
       srcReg->Index = paramIndex;      
    }
@@ -1145,7 +1148,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState,
       if (!Parse_VectorConstant(parseState, values))
          RETURN_ERROR;
       paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
+                                              (gl_constant_value *) values,
+                                              4, NULL);
       srcReg->File = PROGRAM_NAMED_PARAM;
       srcReg->Index = paramIndex;      
    }
@@ -1170,7 +1174,8 @@ Parse_ScalarSrcReg(struct parse_state *parseState,
       if (!Parse_ScalarConstant(parseState, values))
          RETURN_ERROR;
       paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
+                                              (gl_constant_value *) values,
+                                              4, NULL);
       srcReg->Index = paramIndex;      
       srcReg->File = PROGRAM_NAMED_PARAM;
       needSuffix = GL_FALSE;
@@ -1296,7 +1301,8 @@ Parse_InstructionSequence(struct parse_state *parseState,
             RETURN_ERROR2(id, "already defined");
          }
          _mesa_add_named_parameter(parseState->parameters,
-                                   (const char *) id, value);
+                                   (const char *) id,
+                                   (gl_constant_value *) value);
       }
       else if (Parse_String(parseState, "DECLARE")) {
          GLubyte id[100];
@@ -1315,7 +1321,8 @@ Parse_InstructionSequence(struct parse_state *parseState,
             RETURN_ERROR2(id, "already declared");
          }
          _mesa_add_named_parameter(parseState->parameters,
-                                   (const char *) id, value);
+                                   (const char *) id,
+                                   (gl_constant_value *) value);
       }
       else if (Parse_String(parseState, "END")) {
          inst->Opcode = OPCODE_END;
diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index e7553c69dbe..dbfd1b91875 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -157,7 +157,7 @@ get_src_register_pointer(const struct prog_src_register *source,
    case PROGRAM_NAMED_PARAM:
       if (reg >= (GLint) prog->Parameters->NumParameters)
          return ZeroVec;
-      return prog->Parameters->ParameterValues[reg];
+      return (GLfloat *) prog->Parameters->ParameterValues[reg];
 
    case PROGRAM_SYSTEM_VALUE:
       assert(reg < Elements(machine->SystemValues));
diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c
index 3570cab118b..b1cdf8bf2c0 100644
--- a/src/mesa/program/prog_parameter.c
+++ b/src/mesa/program/prog_parameter.c
@@ -56,8 +56,8 @@ _mesa_new_parameter_list_sized(unsigned size)
       p->Parameters = (struct gl_program_parameter *)
 	 calloc(1, size * sizeof(struct gl_program_parameter));
 
-      p->ParameterValues = (GLfloat (*)[4])
-         _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16);
+      p->ParameterValues = (gl_constant_value (*)[4])
+         _mesa_align_malloc(size * 4 *sizeof(gl_constant_value), 16);
 
 
       if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) {
@@ -101,14 +101,15 @@ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList)
  * \param name  the parameter name, will be duplicated/copied!
  * \param size  number of elements in 'values' vector (1..4, or more)
  * \param datatype  GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE.
- * \param values  initial parameter value, up to 4 GLfloats, or NULL
+ * \param values  initial parameter value, up to 4 gl_constant_values, or NULL
  * \param state  state indexes, or NULL
  * \return  index of new parameter in the list, or -1 if error (out of mem)
  */
 GLint
 _mesa_add_parameter(struct gl_program_parameter_list *paramList,
                     gl_register_file type, const char *name,
-                    GLuint size, GLenum datatype, const GLfloat *values,
+                    GLuint size, GLenum datatype,
+                    const gl_constant_value *values,
                     const gl_state_index state[STATE_LENGTH],
                     GLbitfield flags)
 {
@@ -127,10 +128,10 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
 		       oldNum * sizeof(struct gl_program_parameter),
 		       paramList->Size * sizeof(struct gl_program_parameter));
 
-      paramList->ParameterValues = (GLfloat (*)[4])
+      paramList->ParameterValues = (gl_constant_value (*)[4])
          _mesa_align_realloc(paramList->ParameterValues,         /* old buf */
-                             oldNum * 4 * sizeof(GLfloat),      /* old size */
-                             paramList->Size * 4 *sizeof(GLfloat), /* new sz */
+                             oldNum * 4 * sizeof(gl_constant_value),/* old sz */
+                             paramList->Size*4*sizeof(gl_constant_value),/*new*/
                              16);
    }
 
@@ -142,7 +143,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
       return -1;
    }
    else {
-      GLuint i;
+      GLuint i, j;
 
       paramList->NumParameters = oldNum + sz4;
 
@@ -163,7 +164,8 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
          }
          else {
             /* silence valgrind */
-            ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0);
+            for (j = 0; j < 4; j++)
+            	paramList->ParameterValues[oldNum + i][j].f = 0;
          }
          size -= 4;
       }
@@ -184,7 +186,7 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
  */
 GLint
 _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
-                          const char *name, const GLfloat values[4])
+                          const char *name, const gl_constant_value values[4])
 {
    return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name,
                               4, GL_NONE, values, NULL, 0x0);
@@ -204,17 +206,17 @@ _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
  */
 GLint
 _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
-                         const char *name, const GLfloat values[4],
+                         const char *name, const gl_constant_value values[4],
                          GLuint size)
 {
    /* first check if this is a duplicate constant */
    GLint pos;
    for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) {
-      const GLfloat *pvals = paramList->ParameterValues[pos];
-      if (pvals[0] == values[0] &&
-          pvals[1] == values[1] &&
-          pvals[2] == values[2] &&
-          pvals[3] == values[3] &&
+      const gl_constant_value *pvals = paramList->ParameterValues[pos];
+      if (pvals[0].u == values[0].u &&
+          pvals[1].u == values[1].u &&
+          pvals[2].u == values[2].u &&
+          pvals[3].u == values[3].u &&
           strcmp(paramList->Parameters[pos].Name, name) == 0) {
          /* Same name and value is already in the param list - reuse it */
          return pos;
@@ -240,7 +242,7 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
  */
 GLint
 _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
-                           const GLfloat values[4], GLuint size,
+                           const gl_constant_value values[4], GLuint size,
                            GLuint *swizzleOut)
 {
    GLint pos;
@@ -262,7 +264,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
          struct gl_program_parameter *p = paramList->Parameters + pos;
          if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) {
             /* ok, found room */
-            GLfloat *pVal = paramList->ParameterValues[pos];
+            gl_constant_value *pVal = paramList->ParameterValues[pos];
             GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */
             pVal[p->Size] = values[0];
             p->Size++;
@@ -401,7 +403,7 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
  * Lookup a parameter value by name in the given parameter list.
  * \return pointer to the float[4] values.
  */
-GLfloat *
+gl_constant_value *
 _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
                              GLsizei nameLen, const char *name)
 {
@@ -465,7 +467,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
  */
 GLboolean
 _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
-                                const GLfloat v[], GLuint vSize,
+                                const gl_constant_value v[], GLuint vSize,
                                 GLint *posOut, GLuint *swizzleOut)
 {
    GLuint i;
@@ -484,7 +486,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
             /* swizzle not allowed */
             GLuint j, match = 0;
             for (j = 0; j < vSize; j++) {
-               if (v[j] == list->ParameterValues[i][j])
+               if (v[j].u == list->ParameterValues[i][j].u)
                   match++;
             }
             if (match == vSize) {
@@ -498,7 +500,7 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
                 /* look for v[0] anywhere within float[4] value */
                 GLuint j;
                 for (j = 0; j < list->Parameters[i].Size; j++) {
-                   if (list->ParameterValues[i][j] == v[0]) {
+                   if (list->ParameterValues[i][j].u == v[0].u) {
                       /* found it */
                       *posOut = i;
                       *swizzleOut = MAKE_SWIZZLE4(j, j, j, j);
@@ -511,13 +513,13 @@ _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
                 GLuint swz[4];
                 GLuint match = 0, j, k;
                 for (j = 0; j < vSize; j++) {
-                   if (v[j] == list->ParameterValues[i][j]) {
+                   if (v[j].u == list->ParameterValues[i][j].u) {
                       swz[j] = j;
                       match++;
                    }
                    else {
                       for (k = 0; k < list->Parameters[i].Size; k++) {
-                         if (v[j] == list->ParameterValues[i][k]) {
+                         if (v[j].u == list->ParameterValues[i][k].u) {
                             swz[j] = k;
                             match++;
                             break;
diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index 10cbbe57a6c..dcc171ed745 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -46,7 +46,15 @@
 #define PROG_PARAM_BIT_CYL_WRAP  0x10  /**< XXX gallium debug */
 /*@}*/
 
-
+/**
+ * Actual data for constant values of parameters.
+ */
+typedef union gl_constant_value {
+	GLfloat f;
+	GLboolean b;
+	GLint i;
+	GLuint u;
+} gl_constant_value;
 
 /**
  * Program parameter.
@@ -81,7 +89,7 @@ struct gl_program_parameter_list
    GLuint Size;           /**< allocated size of Parameters, ParameterValues */
    GLuint NumParameters;  /**< number of parameters in arrays */
    struct gl_program_parameter *Parameters; /**< Array [Size] */
-   GLfloat (*ParameterValues)[4];        /**< Array [Size] of GLfloat[4] */
+   gl_constant_value (*ParameterValues)[4]; /**< Array [Size] of constant[4] */
    GLbitfield StateFlags; /**< _NEW_* flags indicating which state changes
                                might invalidate ParameterValues[] */
 };
@@ -112,22 +120,23 @@ _mesa_num_parameters(const struct gl_program_parameter_list *list)
 extern GLint
 _mesa_add_parameter(struct gl_program_parameter_list *paramList,
                     gl_register_file type, const char *name,
-                    GLuint size, GLenum datatype, const GLfloat *values,
+                    GLuint size, GLenum datatype,
+                    const gl_constant_value *values,
                     const gl_state_index state[STATE_LENGTH],
                     GLbitfield flags);
 
 extern GLint
 _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
-                          const char *name, const GLfloat values[4]);
+                          const char *name, const gl_constant_value values[4]);
 
 extern GLint
 _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
-                         const char *name, const GLfloat values[4],
+                         const char *name, const gl_constant_value values[4],
                          GLuint size);
 
 extern GLint
 _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
-                           const GLfloat values[4], GLuint size,
+                           const gl_constant_value values[4], GLuint size,
                            GLuint *swizzleOut);
 
 extern GLint
@@ -143,7 +152,7 @@ extern GLint
 _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
                           const gl_state_index stateTokens[STATE_LENGTH]);
 
-extern GLfloat *
+extern gl_constant_value *
 _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
                              GLsizei nameLen, const char *name);
 
@@ -153,7 +162,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
 
 extern GLboolean
 _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
-                                const GLfloat v[], GLuint vSize,
+                                const gl_constant_value v[], GLuint vSize,
                                 GLint *posOut, GLuint *swizzleOut);
 
 extern GLuint
diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c
index 90a9771080c..28fca3b92d9 100644
--- a/src/mesa/program/prog_parameter_layout.c
+++ b/src/mesa/program/prog_parameter_layout.c
@@ -182,7 +182,7 @@ _mesa_layout_parameters(struct asm_parser_state *state)
 
 	 switch (p->Type) {
 	 case PROGRAM_CONSTANT: {
-	    const float *const v =
+	    const gl_constant_value *const v =
 	       state->prog->Parameters->ParameterValues[idx];
 
 	    inst->Base.SrcReg[i].Index =
diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c
index 7c3b4909e73..70412b1fa6a 100644
--- a/src/mesa/program/prog_print.c
+++ b/src/mesa/program/prog_print.c
@@ -985,7 +985,7 @@ _mesa_fprint_parameter_list(FILE *f,
    fprintf(f, "dirty state flags: 0x%x\n", list->StateFlags);
    for (i = 0; i < list->NumParameters; i++){
       struct gl_program_parameter *param = list->Parameters + i;
-      const GLfloat *v = list->ParameterValues[i];
+      const GLfloat *v = (GLfloat *) list->ParameterValues[i];
       fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}",
 	      i, param->Size,
 	      _mesa_register_file_name(list->Parameters[i].Type),
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 224446a2683..4f2b6270501 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -1030,7 +1030,8 @@ _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog)
    GLuint i;
    GLuint whiteSwizzle;
    GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters,
-                                                 white, 4, &whiteSwizzle);
+                                                 (gl_constant_value *) white,
+                                                 4, &whiteSwizzle);
 
    (void) whiteIndex;
 
diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp
index 1457d1199fa..e8d34c670a9 100644
--- a/src/mesa/program/sampler.cpp
+++ b/src/mesa/program/sampler.cpp
@@ -132,6 +132,6 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler,
 
    index += getname.offset;
 
-   return prog->Parameters->ParameterValues[index][0];
+   return prog->Parameters->ParameterValues[index][0].f;
 }
 }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 75ab9c5de7c..881b9e05de1 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -706,9 +706,11 @@ struct st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 {
    st_src_reg src(PROGRAM_CONSTANT, -1, NULL);
+   union gl_constant_value uval;
 
+   uval.f = val;
    src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-        				  &val, 1, &src.swizzle);
+        				  &uval, 1, &src.swizzle);
 
    return src;
 }
@@ -1791,7 +1793,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
 
          src = st_src_reg(PROGRAM_CONSTANT, -1, NULL);
          src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-        					values,
+        					(gl_constant_value *) values,
         					ir->type->vector_elements,
         					&src.swizzle);
          emit(ir, TGSI_OPCODE_MOV, mat_column, src);
@@ -1829,7 +1831,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
 
    this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type);
    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-        					   values,
+        					   (gl_constant_value *) values,
         					   ir->type->vector_elements,
         					   &this->result.swizzle);
 }
@@ -2401,7 +2403,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
           */
          if (file == PROGRAM_SAMPLER) {
             for (unsigned int j = 0; j < size / 4; j++)
-               prog->Parameters->ParameterValues[index + j][0] = next_sampler++;
+               prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
          }
 
          /* The location chosen in the Parameters list here (returned
@@ -3762,7 +3764,7 @@ st_translate_program(
             else
                t->constants[i] = 
                   ureg_DECL_immediate( ureg,
-                                       proginfo->Parameters->ParameterValues[i],
+                                       (GLfloat *) proginfo->Parameters->ParameterValues[i],
                                        4 );
             break;
          default:

From f95169deb40f8245f4b3b07b17b222746da29bdd Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 13 Jun 2011 17:52:54 -0500
Subject: [PATCH 152/600] tgsi: add support for TGSI_OPCODE_MOD in tgsi_exec

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 9cf74a838fe..072772eaa7e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2977,6 +2977,17 @@ micro_xor(union tgsi_exec_channel *dst,
    dst->u[3] = src0->u[3] ^ src1->u[3];
 }
 
+static void
+micro_mod(union tgsi_exec_channel *dst,
+          const union tgsi_exec_channel *src0,
+          const union tgsi_exec_channel *src1)
+{
+   dst->i[0] = src0->i[0] % src1->i[0];
+   dst->i[1] = src0->i[1] % src1->i[1];
+   dst->i[2] = src0->i[2] % src1->i[2];
+   dst->i[3] = src0->i[3] % src1->i[3];
+}
+
 static void
 micro_f2i(union tgsi_exec_channel *dst,
           const union tgsi_exec_channel *src)
@@ -3680,7 +3691,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_MOD:
-      assert (0);
+      exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
       break;
 
    case TGSI_OPCODE_XOR:

From b191382c60bdcfeb7f424b23aa6ab63de81e2f08 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 13 Jun 2011 18:12:56 -0500
Subject: [PATCH 153/600] mesa, glsl_to_tgsi: add native support for integers
 in shaders

Disabled by default on all drivers.  To enable it, change ctx->GLSLVersion to 130
in st_extensions.c.  Currently, softpipe is the only driver with integer support.
---
 src/glsl/glsl_types.h                      |  15 +
 src/mesa/main/uniforms.c                   |  38 ++-
 src/mesa/program/prog_parameter.c          |  28 +-
 src/mesa/program/prog_parameter.h          |   5 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 302 ++++++++++++++++++---
 5 files changed, 328 insertions(+), 60 deletions(-)

diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index 1b069df74fe..eb9d501858a 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -165,6 +165,21 @@ struct glsl_type {
    static const glsl_type *const mat4x3_type;
    static const glsl_type *const mat4_type;
    /*@}*/
+   
+   /**
+    * Get the built-in instance of the vec4 type for a specific base type
+    */
+   static const glsl_type *get_vec4_type(glsl_base_type base_type)
+   {
+      if (base_type == GLSL_TYPE_FLOAT)
+         return vec4_type;
+      else if (base_type == GLSL_TYPE_INT)
+         return ivec4_type;
+      else if (base_type == GLSL_TYPE_UINT)
+         return uvec4_type;
+      else
+         return NULL;
+   }
 
 
    /**
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 07d46c6404f..ce4863faf78 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -454,8 +454,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = (GLint)
-                     prog->Parameters->ParameterValues[base][j].f;
+                  params[k++] = ctx->Const.GLSLVersion <= 120 ? 
+                     (GLint) prog->Parameters->ParameterValues[base][j].f : 
+                     prog->Parameters->ParameterValues[base][j].i;
                }
             }
          }
@@ -467,8 +468,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = (GLuint)
-                     prog->Parameters->ParameterValues[base][j].f;
+                  params[k++] = ctx->Const.GLSLVersion <= 120 ? 
+                     (GLuint) prog->Parameters->ParameterValues[base][j].f : 
+                     prog->Parameters->ParameterValues[base][j].u;
                }
             }
          }
@@ -735,42 +737,52 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
 
       /* loop over number of array elements */
       for (k = 0; k < count; k++) {
-         GLfloat *uniformVal;
+         gl_constant_value *uniformVal;
 
          if (offset + k >= slots) {
             /* Extra array data is ignored */
             break;
          }
 
-         /* uniformVal (the destination) is always float[4] */
+         /* uniformVal (the destination) is always gl_constant_value[4] */
          uniformVal = program->Parameters->ParameterValues[index + offset + k];
 
          if (basicType == GL_INT) {
-            /* convert user's ints to floats */
             const GLint *iValues = ((const GLint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = (GLfloat) iValues[i];
+               if (ctx->Const.GLSLVersion <= 120)
+                  uniformVal[i].f = (GLfloat) iValues[i];
+               else
+                  uniformVal[i].i = iValues[i];
             }
          }
          else if (basicType == GL_UNSIGNED_INT) {
-            /* convert user's uints to floats */
             const GLuint *iValues = ((const GLuint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = (GLfloat) iValues[i];
+               if (ctx->Const.GLSLVersion <= 120)
+                  uniformVal[i].f = (GLfloat)(GLuint) iValues[i];
+               else
+                  uniformVal[i].u = iValues[i];
             }
          }
          else {
             const GLfloat *fValues = ((const GLfloat *) values) + k * elems;
             assert(basicType == GL_FLOAT);
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = fValues[i];
+               uniformVal[i].f = fValues[i];
             }
          }
 
-         /* if the uniform is bool-valued, convert to 1.0 or 0.0 */
+         /* if the uniform is bool-valued, convert to 1 or 0 */
          if (isUniformBool) {
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = uniformVal[i] ? 1.0f : 0.0f;
+               if (basicType == GL_FLOAT)
+                  uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0;
+               else
+                  uniformVal[i].b = uniformVal[i].u ? 1 : 0;
+               
+               if (ctx->Const.GLSLVersion <= 120)
+                  uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
             }
          }
       }
diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c
index b1cdf8bf2c0..49b3ffbdd5c 100644
--- a/src/mesa/program/prog_parameter.c
+++ b/src/mesa/program/prog_parameter.c
@@ -241,9 +241,9 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
  * \return index/position of the new parameter in the parameter list.
  */
 GLint
-_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
+_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
                            const gl_constant_value values[4], GLuint size,
-                           GLuint *swizzleOut)
+                           GLenum datatype, GLuint *swizzleOut)
 {
    GLint pos;
    ASSERT(size >= 1);
@@ -276,7 +276,7 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
 
    /* add a new parameter to store this constant */
    pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL,
-                             size, GL_NONE, values, NULL, 0x0);
+                             size, datatype, values, NULL, 0x0);
    if (pos >= 0 && swizzleOut) {
       if (size == 1)
          *swizzleOut = SWIZZLE_XXXX;
@@ -286,6 +286,28 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
    return pos;
 }
 
+/**
+ * Add a new unnamed constant to the parameter list.  This will be used
+ * when a fragment/vertex program contains something like this:
+ *    MOV r, { 0, 1, 2, 3 };
+ * If swizzleOut is non-null we'll search the parameter list for an
+ * existing instance of the constant which matches with a swizzle.
+ *
+ * \param paramList  the parameter list
+ * \param values  four float values
+ * \param swizzleOut  returns swizzle mask for accessing the constant
+ * \return index/position of the new parameter in the parameter list.
+ * \sa _mesa_add_typed_unnamed_constant
+ */
+GLint
+_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
+                           const gl_constant_value values[4], GLuint size,
+                           GLuint *swizzleOut)
+{
+   return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE,
+                                           swizzleOut);
+}
+
 /**
  * Add parameter representing a varying variable.
  */
diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index dcc171ed745..f858cf0fa0d 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -134,6 +134,11 @@ _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
                          const char *name, const gl_constant_value values[4],
                          GLuint size);
 
+extern GLint
+_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
+                           const gl_constant_value values[4], GLuint size,
+                           GLenum datatype, GLuint *swizzleOut);
+
 extern GLint
 _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
                            const gl_constant_value values[4], GLuint size,
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 881b9e05de1..3f5c0c60226 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -96,11 +96,13 @@ public:
       else
          this->swizzle = SWIZZLE_XYZW;
       this->negate = 0;
+      this->type = type ? type->base_type : GLSL_TYPE_ERROR;
       this->reladdr = NULL;
    }
 
-   st_src_reg(gl_register_file file, int index)
+   st_src_reg(gl_register_file file, int index, int type)
    {
+      this->type = type;
       this->file = file;
       this->index = index;
       this->swizzle = SWIZZLE_XYZW;
@@ -110,6 +112,7 @@ public:
 
    st_src_reg()
    {
+      this->type = GLSL_TYPE_ERROR;
       this->file = PROGRAM_UNDEFINED;
       this->index = 0;
       this->swizzle = 0;
@@ -123,23 +126,26 @@ public:
    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
    int negate; /**< NEGATE_XYZW mask from mesa */
+   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
    /** Register index should be offset by the integer in this reg. */
    st_src_reg *reladdr;
 };
 
 class st_dst_reg {
 public:
-   st_dst_reg(gl_register_file file, int writemask)
+   st_dst_reg(gl_register_file file, int writemask, int type)
    {
       this->file = file;
       this->index = 0;
       this->writemask = writemask;
       this->cond_mask = COND_TR;
       this->reladdr = NULL;
+      this->type = type;
    }
 
    st_dst_reg()
    {
+      this->type = GLSL_TYPE_ERROR;
       this->file = PROGRAM_UNDEFINED;
       this->index = 0;
       this->writemask = 0;
@@ -153,12 +159,14 @@ public:
    int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
    GLuint cond_mask:4;
+   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
    /** Register index should be offset by the integer in this reg. */
    st_src_reg *reladdr;
 };
 
 st_src_reg::st_src_reg(st_dst_reg reg)
 {
+   this->type = reg.type;
    this->file = reg.file;
    this->index = reg.index;
    this->swizzle = SWIZZLE_XYZW;
@@ -168,6 +176,7 @@ st_src_reg::st_src_reg(st_dst_reg reg)
 
 st_dst_reg::st_dst_reg(st_src_reg reg)
 {
+   this->type = reg.type;
    this->file = reg.file;
    this->index = reg.index;
    this->writemask = WRITEMASK_XYZW;
@@ -267,6 +276,8 @@ public:
    int samplers_used;
    bool indirect_addr_temps;
    bool indirect_addr_consts;
+   
+   int glsl_version;
 
    variable_storage *find_variable_storage(ir_variable *var);
 
@@ -276,6 +287,8 @@ public:
    void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
 
    st_src_reg st_src_reg_for_float(float val);
+   st_src_reg st_src_reg_for_int(int val);
+   st_src_reg st_src_reg_for_type(int type, int val);
 
    /**
     * \name Visit methods
@@ -327,6 +340,10 @@ public:
    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
         		        st_dst_reg dst,
         		        st_src_reg src0, st_src_reg src1, st_src_reg src2);
+   
+   unsigned get_opcode(ir_instruction *ir, unsigned op,
+                    st_dst_reg dst,
+                    st_src_reg src0, st_src_reg src1);
 
    /**
     * Emit the correct dot-product instruction for the type of arguments
@@ -343,6 +360,8 @@ public:
    void emit_scalar(ir_instruction *ir, unsigned op,
         	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 
+   void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
+
    void emit_scs(ir_instruction *ir, unsigned op,
         	 st_dst_reg dst, const st_src_reg &src);
 
@@ -372,9 +391,9 @@ public:
 
 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL);
 
-static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
+static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
 
-static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
+static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
 
 static void
 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
@@ -432,6 +451,8 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 {
    glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
    int num_reladdr = 0, i;
+   
+   op = get_opcode(ir, op, dst, src0, src1);
 
    /* If we have to do relative addressing, we want to load the ARL
     * reg directly for one of the regs, and preload the other reladdr
@@ -447,7 +468,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
    reladdr_to_temp(ir, &src0, &num_reladdr);
 
    if (dst.reladdr) {
-      emit(ir, TGSI_OPCODE_ARL, address_reg, *dst.reladdr);
+      emit_arl(ir, address_reg, *dst.reladdr);
       num_reladdr--;
    }
    assert(num_reladdr == 0);
@@ -531,6 +552,62 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 }
 
+/**
+ * Determines whether to use an integer, unsigned integer, or float opcode 
+ * based on the operands and input opcode, then emits the result.
+ * 
+ * TODO: type checking for remaining TGSI opcodes
+ */
+unsigned
+glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
+        		 st_dst_reg dst,
+        		 st_src_reg src0, st_src_reg src1)
+{
+   int type = GLSL_TYPE_FLOAT;
+   
+   if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
+      type = GLSL_TYPE_FLOAT;
+   else if (glsl_version >= 130)
+      type = src0.type;
+
+#define case4(c, f, i, u) \
+   case TGSI_OPCODE_##c: \
+      if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
+      else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
+      else op = TGSI_OPCODE_##f; \
+      break;
+#define case3(f, i, u)  case4(f, f, i, u)
+#define case2fi(f, i)   case4(f, f, i, i)
+#define case2iu(i, u)   case4(i, LAST, i, u)
+   
+   switch(op) {
+      case2fi(ADD, UADD);
+      case2fi(MUL, UMUL);
+      case2fi(MAD, UMAD);
+      case3(DIV, IDIV, UDIV);
+      case3(MAX, IMAX, UMAX);
+      case3(MIN, IMIN, UMIN);
+      case2iu(MOD, UMOD);
+      
+      case2fi(SEQ, USEQ);
+      case2fi(SNE, USNE);
+      case3(SGE, ISGE, USGE);
+      case3(SLT, ISLT, USLT);
+      
+      case2iu(SHL, SHL);
+      case2iu(ISHR, USHR);
+      case2iu(NOT, NOT);
+      case2iu(AND, AND);
+      case2iu(OR, OR);
+      case2iu(XOR, XOR);
+      
+      default: break;
+   }
+   
+   assert(op != TGSI_OPCODE_LAST);
+   return op;
+}
+
 void
 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
         		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
@@ -607,6 +684,22 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
    emit_scalar(ir, op, dst, src0, undef);
 }
 
+void
+glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
+        		        st_dst_reg dst, st_src_reg src0)
+{
+   st_src_reg tmp = get_temp(glsl_type::float_type);
+
+   if (src0.type == GLSL_TYPE_INT)
+      emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
+   else if (src0.type == GLSL_TYPE_UINT)
+      emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
+   else
+      tmp = src0;
+   
+   emit(ir, TGSI_OPCODE_ARL, dst, tmp);
+}
+
 /**
  * Emit an TGSI_OPCODE_SCS instruction
  *
@@ -705,16 +798,41 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
 struct st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 {
-   st_src_reg src(PROGRAM_CONSTANT, -1, NULL);
+   st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT);
    union gl_constant_value uval;
 
    uval.f = val;
-   src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-        				  &uval, 1, &src.swizzle);
+   src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+        				  &uval, 1, GL_FLOAT, &src.swizzle);
 
    return src;
 }
 
+struct st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
+{
+   st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT);
+   union gl_constant_value uval;
+   
+   assert(glsl_version >= 130);
+
+   uval.i = val;
+   src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+        				  &uval, 1, GL_INT, &src.swizzle);
+
+   return src;
+}
+
+struct st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
+{
+   if (glsl_version >= 130)
+      return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 
+                                       st_src_reg_for_int(val);
+   else
+      return st_src_reg_for_float(val);
+}
+
 static int
 type_size(const struct glsl_type *type)
 {
@@ -759,8 +877,7 @@ type_size(const struct glsl_type *type)
 /**
  * In the initial pass of codegen, we assign temporary numbers to
  * intermediate results.  (not SSA -- variable assignments will reuse
- * storage).  Actual register allocation for the Mesa VM occurs in a
- * pass over the Mesa IR later.
+ * storage).
  */
 st_src_reg
 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
@@ -769,6 +886,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
    int swizzle[4];
    int i;
 
+   src.type = type->base_type;
    src.file = PROGRAM_TEMPORARY;
    src.index = next_temp;
    src.reladdr = NULL;
@@ -875,7 +993,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
          this->variables.push_tail(storage);
          this->next_temp += type_size(ir->type);
 
-         dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
+         dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
+               glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT));
       }
 
 
@@ -890,7 +1009,8 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
                assert(index == storage->index + (int)i);
             }
          } else {
-            st_src_reg src(PROGRAM_STATE_VAR, index, NULL);
+            st_src_reg src(PROGRAM_STATE_VAR, index,
+                  glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT);
             src.swizzle = slots[i].swizzle;
             emit(ir, TGSI_OPCODE_MOV, dst, src);
             /* even a float takes up a whole vec4 reg in a struct/array. */
@@ -1058,7 +1178,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
    if (!reg->reladdr)
       return;
 
-   emit(ir, TGSI_OPCODE_ARL, address_reg, *reg->reladdr);
+   emit_arl(ir, address_reg, *reg->reladdr);
 
    if (*num_reladdr != 1) {
       st_src_reg temp = get_temp(glsl_type::vec4_type);
@@ -1131,13 +1251,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
 
    switch (ir->operation) {
    case ir_unop_logic_not:
-      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0));
+      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
       break;
    case ir_unop_neg:
-      op[0].negate = ~op[0].negate;
-      result_src = op[0];
+      assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
+      if (result_dst.type == GLSL_TYPE_INT)
+         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
+      else {
+         op[0].negate = ~op[0].negate;
+         result_src = op[0];
+      }
       break;
    case ir_unop_abs:
+      assert(result_dst.type == GLSL_TYPE_FLOAT);
       emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
       break;
    case ir_unop_sign:
@@ -1200,9 +1326,16 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
       break;
    case ir_binop_div:
-      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+      if (result_dst.type == GLSL_TYPE_FLOAT)
+         assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+      else
+         emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
+      break;
    case ir_binop_mod:
-      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      if (result_dst.type == GLSL_TYPE_FLOAT)
+         assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      else
+         emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_less:
@@ -1227,7 +1360,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       /* "==" operator producing a scalar boolean. */
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(glsl_type::vec4_type);
+         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+               glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : 
+               glsl_type::vec4_type);
+         assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
          emit_dp(ir, result_dst, temp, temp, vector_elements);
          emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
@@ -1239,7 +1375,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       /* "!=" operator producing a scalar boolean. */
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(glsl_type::vec4_type);
+         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+               glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : 
+               glsl_type::vec4_type);
+         assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
          emit_dp(ir, result_dst, temp, temp, vector_elements);
          emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
@@ -1291,17 +1430,24 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       break;
    case ir_unop_i2f:
    case ir_unop_b2f:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
+         break;
+      }
    case ir_unop_b2i:
-      /* Mesa IR lacks types, ints are stored as truncated floats. */
+      /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
       result_src = op[0];
       break;
    case ir_unop_f2i:
-      emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
+      if (glsl_version >= 130)
+         emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
+      else
+         emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
       break;
    case ir_unop_f2b:
    case ir_unop_i2b:
-      emit(ir, TGSI_OPCODE_SNE, result_dst,
-        		  op[0], st_src_reg_for_float(0.0));
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 
+            st_src_reg_for_type(result_dst.type, 0));
       break;
    case ir_unop_trunc:
       emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
@@ -1329,12 +1475,40 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       break;
 
    case ir_unop_bit_not:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
+         break;
+      }
    case ir_unop_u2f:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
+         break;
+      }
    case ir_binop_lshift:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
+         break;
+      }
    case ir_binop_rshift:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
+         break;
+      }
    case ir_binop_bit_and:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
+         break;
+      }
    case ir_binop_bit_xor:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
+         break;
+      }
    case ir_binop_bit_or:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
+         break;
+      }
    case ir_unop_round_even:
       assert(!"GLSL 1.30 features unsupported");
       break;
@@ -1729,7 +1903,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
 {
    st_src_reg src;
    GLfloat stack_vals[4] = { 0 };
-   GLfloat *values = stack_vals;
+   gl_constant_value *values = (gl_constant_value *) stack_vals;
+   GLenum gl_type = GL_NONE;
    unsigned int i;
 
    /* Unfortunately, 4 floats is all we can get into
@@ -1737,7 +1912,6 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
     * aggregate constant and move each constant value into it.  If we
     * get lucky, copy propagation will eliminate the extra moves.
     */
-
    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
       st_src_reg temp_base = get_temp(ir->type);
       st_dst_reg temp = st_dst_reg(temp_base);
@@ -1789,13 +1963,13 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
 
       for (i = 0; i < ir->type->matrix_columns; i++) {
          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
-         values = &ir->value.f[i * ir->type->vector_elements];
+         values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
 
-         src = st_src_reg(PROGRAM_CONSTANT, -1, NULL);
+         src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type);
          src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-        					(gl_constant_value *) values,
-        					ir->type->vector_elements,
-        					&src.swizzle);
+                                                values,
+                                                ir->type->vector_elements,
+                                                &src.swizzle);
          emit(ir, TGSI_OPCODE_MOV, mat_column, src);
 
          mat_column.index++;
@@ -1808,21 +1982,36 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
    src.file = PROGRAM_CONSTANT;
    switch (ir->type->base_type) {
    case GLSL_TYPE_FLOAT:
-      values = &ir->value.f[0];
+      gl_type = GL_FLOAT;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         values[i].f = ir->value.f[i];
+      }
       break;
    case GLSL_TYPE_UINT:
+      gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
-         values[i] = ir->value.u[i];
+         if (glsl_version >= 130)
+            values[i].u = ir->value.u[i];
+         else
+            values[i].f = ir->value.u[i];
       }
       break;
    case GLSL_TYPE_INT:
+      gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
-         values[i] = ir->value.i[i];
+         if (glsl_version >= 130)
+            values[i].i = ir->value.i[i];
+         else
+            values[i].f = ir->value.i[i];
       }
       break;
    case GLSL_TYPE_BOOL:
+      gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
-         values[i] = ir->value.b[i];
+         if (glsl_version >= 130)
+            values[i].b = ir->value.b[i];
+         else
+            values[i].f = ir->value.b[i];
       }
       break;
    default:
@@ -1830,9 +2019,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
    }
 
    this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type);
-   this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-        					   (gl_constant_value *) values,
-        					   ir->type->vector_elements,
+   this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+        					   values, ir->type->vector_elements, gl_type,
         					   &this->result.swizzle);
 }
 
@@ -2535,6 +2723,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
 {
    GLuint i;
    GLint outputMap[VERT_RESULT_MAX];
+   GLint outputTypes[VERT_RESULT_MAX];
    GLuint numVaryingReads = 0;
    GLboolean usedTemps[MAX_PROGRAM_TEMPS];
    GLuint firstTemp = 0;
@@ -2562,6 +2751,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
                outputMap[var] = _mesa_find_free_register(usedTemps,
                                                          MAX_PROGRAM_TEMPS,
                                                          firstTemp);
+               outputTypes[var] = inst->src[j].type;
                firstTemp = outputMap[var] + 1;
             }
             inst->src[j].file = PROGRAM_TEMPORARY;
@@ -2587,8 +2777,8 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
    for (i = 0; i < VERT_RESULT_MAX; i++) {
       if (outputMap[i] >= 0) {
          /* MOV VAR[i], TEMP[tmp]; */
-         st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]);
-         st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW);
+         st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]);
+         st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]);
          dst.index = i;
          this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
       }
@@ -3762,10 +3952,33 @@ st_translate_program(
             if (program->indirect_addr_consts)
                t->constants[i] = ureg_DECL_constant( ureg, i );
             else
-               t->constants[i] = 
-                  ureg_DECL_immediate( ureg,
-                                       (GLfloat *) proginfo->Parameters->ParameterValues[i],
-                                       4 );
+               switch(proginfo->Parameters->Parameters[i].DataType)
+               {
+               case GL_FLOAT:
+               case GL_FLOAT_VEC2:
+               case GL_FLOAT_VEC3:
+               case GL_FLOAT_VEC4:
+                  t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4);
+                  break;
+               case GL_INT:
+               case GL_INT_VEC2:
+               case GL_INT_VEC3:
+               case GL_INT_VEC4:
+                  t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4);
+                  break;
+               case GL_UNSIGNED_INT:
+               case GL_UNSIGNED_INT_VEC2:
+               case GL_UNSIGNED_INT_VEC3:
+               case GL_UNSIGNED_INT_VEC4:
+               case GL_BOOL:
+               case GL_BOOL_VEC2:
+               case GL_BOOL_VEC3:
+               case GL_BOOL_VEC4:
+                  t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4);
+                  break;
+               default:
+                  assert(!"should not get here");
+               }
             break;
          default:
             break;
@@ -3874,6 +4087,7 @@ get_mesa_program(struct gl_context *ctx,
    v->prog = prog;
    v->shader_program = shader_program;
    v->options = options;
+   v->glsl_version = ctx->Const.GLSLVersion;
 
    add_uniforms_to_parameters_list(shader_program, shader, prog);
 

From b2c067e3075414703a7ebad439d4290c27cab46a Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Tue, 14 Jun 2011 17:38:14 -0500
Subject: [PATCH 154/600] glsl-to-tgsi: fix piglit tests

This commit fixes all of the piglit tests regressed by "mesa, glsl_to_tgsi: add
native support for integers in shaders" on softpipe.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 3f5c0c60226..49613fccda7 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -886,7 +886,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
    int swizzle[4];
    int i;
 
-   src.type = type->base_type;
+   src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
    src.file = PROGRAM_TEMPORARY;
    src.index = next_temp;
    src.reladdr = NULL;
@@ -1632,6 +1632,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
    }
 
    this->result = st_src_reg(entry->file, entry->index, var->type);
+   if (glsl_version <= 120)
+      this->result.type = GLSL_TYPE_FLOAT;
 }
 
 void
@@ -1966,10 +1968,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
 
          src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type);
-         src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-                                                values,
-                                                ir->type->vector_elements,
-                                                &src.swizzle);
+         src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+                                                      values,
+                                                      ir->type->vector_elements,
+                                                      GL_FLOAT,
+                                                      &src.swizzle);
          emit(ir, TGSI_OPCODE_MOV, mat_column, src);
 
          mat_column.index++;
@@ -4142,15 +4145,14 @@ get_mesa_program(struct gl_context *ctx,
    if (target == GL_VERTEX_PROGRAM_ARB)
       v->remove_output_reads(PROGRAM_VARYING);
 
-   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
-   v->copy_propagate();
-   
-   /* FIXME: These passes to optimize temporary registers don't work when there
+   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor.
+    * FIXME: These passes to optimize temporary registers don't work when there
     * is indirect addressing of the temporary register space.  We need proper 
     * array support so that we don't have to give up these passes in every 
     * shader that uses arrays.
     */
    if (!v->indirect_addr_temps) {
+      v->copy_propagate();
       v->merge_registers();
       v->eliminate_dead_code();
       v->renumber_registers();

From bf1cee9f24022e3da96d84fdc6baaa050d3eadf1 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Tue, 14 Jun 2011 18:17:40 -0500
Subject: [PATCH 155/600] glsl_to_tgsi: finish some loose ends

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 46 +++++++++++++++++-----
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 49613fccda7..438f21483c7 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2200,7 +2200,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       ir->lod_info.grad.dPdy->accept(this);
       dy = this->result;
       break;
-   case ir_txf: // TODO: use TGSI_OPCODE_TXF here
+   case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
@@ -3731,6 +3731,37 @@ emit_wpos(struct st_context *st,
    emit_wpos_inversion(t, program, invert);
 }
 
+/**
+ * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
+ * TGSI uses +1 for front, -1 for back.
+ * This function converts the TGSI value to the GL value.  Simply clamping/
+ * saturating the value to [0,1] does the job.
+ */
+static void
+emit_face_var(struct st_translate *t)
+{
+   struct ureg_program *ureg = t->ureg;
+   struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
+   struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
+
+   /* MOV_SAT face_temp, input[face] */
+   face_temp = ureg_saturate(face_temp);
+   ureg_MOV(ureg, face_temp, face_input);
+
+   /* Use face_temp as face input from here on: */
+   t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
+}
+
+static void
+emit_edgeflags(struct st_translate *t)
+{
+   struct ureg_program *ureg = t->ureg;
+   struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
+   struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
+
+   ureg_MOV(ureg, edge_dst, edge_src);
+}
+
 /**
  * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
  * \param program  the program to translate
@@ -3800,15 +3831,11 @@ st_translate_program(
          /* Must do this after setting up t->inputs, and before
           * emitting constant references, below:
           */
-          printf("FRAG_BIT_WPOS\n");
           emit_wpos(st_context(ctx), t, proginfo, ureg);
       }
 
-      if (proginfo->InputsRead & FRAG_BIT_FACE) {
-         // TODO: uncomment
-         printf("FRAG_BIT_FACE\n");
-         //emit_face_var( t, program );
-      }
+      if (proginfo->InputsRead & FRAG_BIT_FACE)
+         emit_face_var(t);
 
       /*
        * Declare output attributes.
@@ -3875,7 +3902,6 @@ st_translate_program(
                /* XXX: note we are modifying the incoming shader here!  Need to
                * do this before emitting the constant decls below, or this
                * will be missed.
-               * XXX: depends on "Parameters" field specific to Mesa IR
                */
             unsigned pointSizeClampConst =
                _mesa_add_state_reference(proginfo->Parameters,
@@ -3887,8 +3913,8 @@ st_translate_program(
             t->outputs[i] = psizregtemp;
          }
       }
-      /*if (passthrough_edgeflags)
-         emit_edgeflags( t, program ); */ // TODO: uncomment
+      if (passthrough_edgeflags)
+         emit_edgeflags(t);
    }
 
    /* Declare address register.

From b30bbd7436bdb9727d3766ba9c07abd610e6dda8 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 15 Jun 2011 14:45:03 -0500
Subject: [PATCH 156/600] glsl_to_tgsi: silence compiler warning

---
 src/mesa/state_tracker/st_mesa_to_tgsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 75842286ba8..656c985d78f 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -1207,7 +1207,7 @@ st_translate_mesa_program(
             else
                t->constants[i] = 
                   ureg_DECL_immediate( ureg,
-                                       program->Parameters->ParameterValues[i],
+                                       (const float*) program->Parameters->ParameterValues[i],
                                        4 );
             break;
          default:

From 1141c3f4c4014e3c2834db65b96a3ba7cc78744a Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 15 Jun 2011 17:31:51 -0500
Subject: [PATCH 157/600] glsl: remove glsl_type::get_vec4_type()

Thanks to Kenneth Graunke for pointing out that glsl_type::get_instance(base, 4, 1)
is the same as glsl_type::get_vec4_type(base).

The function was only used in st_glsl_to_tgsi, and this commit replaces that usage
with get_instance.
---
 src/glsl/glsl_types.h                      | 15 ---------------
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  4 ++--
 2 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h
index eb9d501858a..1b069df74fe 100644
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -165,21 +165,6 @@ struct glsl_type {
    static const glsl_type *const mat4x3_type;
    static const glsl_type *const mat4_type;
    /*@}*/
-   
-   /**
-    * Get the built-in instance of the vec4 type for a specific base type
-    */
-   static const glsl_type *get_vec4_type(glsl_base_type base_type)
-   {
-      if (base_type == GLSL_TYPE_FLOAT)
-         return vec4_type;
-      else if (base_type == GLSL_TYPE_INT)
-         return ivec4_type;
-      else if (base_type == GLSL_TYPE_UINT)
-         return uvec4_type;
-      else
-         return NULL;
-   }
 
 
    /**
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 438f21483c7..5fedf263090 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1361,7 +1361,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
          st_src_reg temp = get_temp(glsl_version >= 130 ? 
-               glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : 
+               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                glsl_type::vec4_type);
          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
@@ -1376,7 +1376,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
          st_src_reg temp = get_temp(glsl_version >= 130 ? 
-               glsl_type::get_vec4_type(ir->operands[0]->type->base_type) : 
+               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                glsl_type::vec4_type);
          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);

From 552cc48fca9b932fceb3d8fa7f9d0067f46b67c2 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 16 Jun 2011 13:42:57 -0500
Subject: [PATCH 158/600] glsl_to_tgsi: fix compile error with g++ 4.6

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5fedf263090..6c92441a105 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -389,7 +389,7 @@ public:
    void *mem_ctx;
 };
 
-static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL);
+static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
 
 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
 

From 29d21417e38aed0f0710d3692df320728aef90b1 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 16 Jun 2011 18:36:16 -0500
Subject: [PATCH 159/600] glsl_to_tgsi: implement simplify_cmp pass needed by
 r300g

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 95 ++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 6c92441a105..322bfbbf1ab 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -374,6 +374,7 @@ public:
    bool process_move_condition(ir_rvalue *ir);
 
    void remove_output_reads(gl_register_file type);
+   void simplify_cmp(void);
 
    void rename_temp_register(int index, int new_index);
    int get_first_temp_read(int index);
@@ -2788,6 +2789,97 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
    }
 }
 
+/**
+ * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
+ * are read from the given src in this instruction
+ */
+static int
+get_src_arg_mask(st_dst_reg dst, st_src_reg src)
+{
+   int read_mask = 0, comp;
+
+   /* Now, given the src swizzle and the written channels, find which
+    * components are actually read
+    */
+   for (comp = 0; comp < 4; ++comp) {
+      const unsigned coord = GET_SWZ(src.swizzle, comp);
+      ASSERT(coord < 4);
+      if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
+         read_mask |= 1 << coord;
+   }
+
+   return read_mask;
+}
+
+/**
+ * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
+ * instruction is the first instruction to write to register T0.  There are
+ * several lowering passes done in GLSL IR (e.g. branches and
+ * relative addressing) that create a large number of conditional assignments
+ * that ir_to_mesa converts to CMP instructions like the one mentioned above.
+ *
+ * Here is why this conversion is safe:
+ * CMP T0, T1 T2 T0 can be expanded to:
+ * if (T1 < 0.0)
+ * 	MOV T0, T2;
+ * else
+ * 	MOV T0, T0;
+ *
+ * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
+ * as the original program.  If (T1 < 0.0) evaluates to false, executing
+ * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
+ * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
+ * because any instruction that was going to read from T0 after this was going
+ * to read a garbage value anyway.
+ */
+void
+glsl_to_tgsi_visitor::simplify_cmp(void)
+{
+   unsigned tempWrites[MAX_PROGRAM_TEMPS];
+   unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
+
+   memset(tempWrites, 0, sizeof(tempWrites));
+   memset(outputWrites, 0, sizeof(outputWrites));
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      unsigned prevWriteMask = 0;
+
+      /* Give up if we encounter relative addressing or flow control. */
+      if (inst->dst.reladdr ||
+          tgsi_get_opcode_info(inst->op)->is_branch ||
+          inst->op == TGSI_OPCODE_BGNSUB ||
+          inst->op == TGSI_OPCODE_CONT ||
+          inst->op == TGSI_OPCODE_END ||
+          inst->op == TGSI_OPCODE_ENDSUB ||
+          inst->op == TGSI_OPCODE_RET) {
+         return;
+      }
+
+      if (inst->dst.file == PROGRAM_OUTPUT) {
+         assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
+         prevWriteMask = outputWrites[inst->dst.index];
+         outputWrites[inst->dst.index] |= inst->dst.writemask;
+      } else if (inst->dst.file == PROGRAM_TEMPORARY) {
+         assert(inst->dst.index < MAX_PROGRAM_TEMPS);
+         prevWriteMask = tempWrites[inst->dst.index];
+         tempWrites[inst->dst.index] |= inst->dst.writemask;
+      }
+
+      /* For a CMP to be considered a conditional write, the destination
+       * register and source register two must be the same. */
+      if (inst->op == TGSI_OPCODE_CMP
+          && !(inst->dst.writemask & prevWriteMask)
+          && inst->src[2].file == inst->dst.file
+          && inst->src[2].index == inst->dst.index
+          && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
+
+         inst->op = TGSI_OPCODE_MOV;
+         inst->src[0] = inst->src[1];
+      }
+   }
+}
+
 /* Replaces all references to a temporary register index with another index. */
 void
 glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
@@ -4170,6 +4262,9 @@ get_mesa_program(struct gl_context *ctx,
    v->remove_output_reads(PROGRAM_OUTPUT);
    if (target == GL_VERTEX_PROGRAM_ARB)
       v->remove_output_reads(PROGRAM_VARYING);
+   
+   /* Perform the simplify_cmp optimization, which is required by r300g. */
+   v->simplify_cmp();
 
    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor.
     * FIXME: These passes to optimize temporary registers don't work when there

From 8c50f18b29637470539d05ccc32b0cae0092aeac Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.l.velikov@gmail.com>
Date: Tue, 21 Jun 2011 21:52:19 +0100
Subject: [PATCH 160/600] glsl_to_tgsi: execute merge_registers() after
 eliminate_dead_code()

Fixes a regression unintentionally introduced by "glsl_to_tgsi: fix shaders with
indirect addressing of temps" that caused missing leaves in 3dmark01 test 4 (Nature)
and missing/displaced textures on human models in Counter-Strike: Source.

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Signed-off-by: Bryan Cain <bryancain3@gmail.com>
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 322bfbbf1ab..abeb44a4083 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4274,8 +4274,8 @@ get_mesa_program(struct gl_context *ctx,
     */
    if (!v->indirect_addr_temps) {
       v->copy_propagate();
-      v->merge_registers();
       v->eliminate_dead_code();
+      v->merge_registers();
       v->renumber_registers();
    }
    

From 8b881ad1c3d9dd3c96afbdbb608a7240d40e9c92 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 23 Jun 2011 19:35:36 -0500
Subject: [PATCH 161/600] glsl_to_tgsi: use swizzle_for_size for src reg in
 conditional moves

This prevents the copy propagation pass from being confused by undefined
channels and thus missing optimization opportunities.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index abeb44a4083..6d76686ab5d 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1882,10 +1882,13 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       st_src_reg condition = this->result;
 
       for (i = 0; i < type_size(ir->lhs->type); i++) {
+         st_src_reg l_src = st_src_reg(l);
+         l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
+         
          if (switch_order) {
-            emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r);
+            emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r);
          } else {
-            emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l));
+            emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src);
          }
 
          l.index++;

From 7ec7dd4fb6ae6c8aa29988754476e1212eb986ef Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 23 Jun 2011 19:53:37 -0500
Subject: [PATCH 162/600] glsl_to_tgsi: remove handling of XPD opcode in
 compile_tgsi_instruction()

The opcode is never emitted by the glsl_to_tgsi_visitor, so its special case in
compile_tgsi_instruction() was dead code.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 6d76686ab5d..721ba28d61f 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3653,14 +3653,6 @@ compile_tgsi_instruction(struct st_translate *t,
                  src, num_src );
       break;
 
-   case TGSI_OPCODE_XPD:
-      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
-      ureg_insn( ureg, 
-                 inst->op, 
-                 dst, num_dst, 
-                 src, num_src );
-      break;
-
    default:
       ureg_insn( ureg, 
                  inst->op, 

From 41472f7809dcff114223b8fadc5b97baff6060a9 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 24 Jun 2011 18:45:04 -0500
Subject: [PATCH 163/600] glsl_to_tgsi: add a better, more advanced dead code
 elimination pass

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 140 +++++++++++++++++++++
 1 file changed, 140 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 721ba28d61f..d47364fabb6 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -208,6 +208,7 @@ public:
    int sampler; /**< sampler index */
    int tex_target; /**< One of TEXTURE_*_INDEX */
    GLboolean tex_shadow;
+   int dead_mask; /**< Used in dead code elimination */
 
    class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
 };
@@ -384,6 +385,7 @@ public:
 
    void copy_propagate(void);
    void eliminate_dead_code(void);
+   int eliminate_dead_code_advanced(void);
    void merge_registers(void);
    void renumber_registers(void);
 
@@ -480,6 +482,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
    inst->src[1] = src1;
    inst->src[2] = src2;
    inst->ir = ir;
+   inst->dead_mask = 0;
 
    inst->function = NULL;
    
@@ -3257,6 +3260,142 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
    }
 }
 
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
+ * code elimination.  This is less primitive than eliminate_dead_code(), as it
+ * is per-channel and can detect consecutive writes without a read between them
+ * as dead code.  However, there is some dead code that can be eliminated by 
+ * eliminate_dead_code() but not this function - for example, this function 
+ * cannot eliminate an instruction writing to a register that is never read and
+ * is the only instruction writing to that register.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.
+ */
+int
+glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
+{
+   glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
+                                                     glsl_to_tgsi_instruction *,
+                                                     this->next_temp * 4);
+   int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+   int level = 0;
+   int removed = 0;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      assert(inst->dst.file != PROGRAM_TEMPORARY
+             || inst->dst.index < this->next_temp);
+      
+      switch (inst->op) {
+      case TGSI_OPCODE_BGNLOOP:
+      case TGSI_OPCODE_ENDLOOP:
+         /* End of a basic block, clear the write array entirely.
+          * FIXME: This keeps us from killing dead code when the writes are
+          * on either side of a loop, even when the register isn't touched
+          * inside the loop.
+          */
+         memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+         break;
+
+      case TGSI_OPCODE_IF:
+         ++level;
+         break;
+
+      case TGSI_OPCODE_ENDIF:
+         --level;
+         break;
+
+      case TGSI_OPCODE_ELSE:
+         /* Clear all channels written inside the preceding if block from the
+          * write array, but leave those that were not touched.
+          *
+          * FIXME: This destroys opportunities to remove dead code inside of
+          * IF blocks that are followed by an ELSE block.
+          */
+         for (int r = 0; r < this->next_temp; r++) {
+            for (int c = 0; c < 4; c++) {
+               if (!writes[4 * r + c])
+        	         continue;
+
+               if (write_level[4 * r + c] >= level)
+        	         writes[4 * r + c] = NULL;
+            }
+         }
+         break;
+
+      default:
+         /* Continuing the block, clear any channels from the write array that
+          * are read by this instruction.
+          */
+         for (int i = 0; i < 4; i++) {
+            if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
+               /* Any temporary might be read, so no dead code elimination 
+                * across this instruction.
+                */
+               memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+            } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
+               /* Clear where it's used as src. */
+               int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
+               
+               for (int c = 0; c < 4; c++) {
+              	   if (src_chans & (1 << c)) {
+              	      writes[4 * inst->src[i].index + c] = NULL;
+              	   }
+               }
+            }
+         }
+         break;
+      }
+
+      /* If this instruction writes to a temporary, add it to the write array.
+       * If there is already an instruction in the write array for one or more
+       * of the channels, flag that channel write as dead.
+       */
+      if (inst->dst.file == PROGRAM_TEMPORARY &&
+          !inst->dst.reladdr &&
+          !inst->saturate) {
+         for (int c = 0; c < 4; c++) {
+            if (inst->dst.writemask & (1 << c)) {
+               if (writes[4 * inst->dst.index + c]) {
+                  if (write_level[4 * inst->dst.index + c] < level)
+                     continue;
+                  else
+                     writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
+               }
+               writes[4 * inst->dst.index + c] = inst;
+               write_level[4 * inst->dst.index + c] = level;
+            }
+         }
+      }
+   }
+
+   /* Now actually remove the instructions that are completely dead and update
+    * the writemask of other instructions with dead channels.
+    */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (!inst->dead_mask || !inst->dst.writemask)
+         continue;
+      else if (inst->dead_mask == inst->dst.writemask) {
+         iter.remove();
+         delete inst;
+         removed++;
+      } else
+         inst->dst.writemask &= ~(inst->dead_mask);
+   }
+
+   ralloc_free(write_level);
+   ralloc_free(writes);
+   
+   return removed;
+}
+
 /* Merges temporary registers together where possible to reduce the number of 
  * registers needed to run a program.
  * 
@@ -4269,6 +4408,7 @@ get_mesa_program(struct gl_context *ctx,
     */
    if (!v->indirect_addr_temps) {
       v->copy_propagate();
+      while (v->eliminate_dead_code_advanced());
       v->eliminate_dead_code();
       v->merge_registers();
       v->renumber_registers();

From 194732fd7299481dd57815f46a594d155260ce17 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 24 Jun 2011 20:37:53 -0500
Subject: [PATCH 164/600] glsl_to_tgsi: use a more specific condition for
 gl_FragDepth hack in generating assignments

This reduces the number of instructions in the fragment shader of
glsl-fs-atan-2 from 174 to 146 with EmitNoIfs enabled.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index d47364fabb6..5f22f7091d6 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1841,7 +1841,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
    if (ir->write_mask == 0) {
       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
       l.writemask = WRITEMASK_XYZW;
-   } else if (ir->lhs->type->is_scalar()) {
+   } else if (ir->lhs->type->is_scalar() &&
+              ir->lhs->variable_referenced()->mode == ir_var_out) {
       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
        * FINISHME: W component of fragment shader output zero, work correctly.
        */
@@ -1851,7 +1852,6 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       int first_enabled_chan = 0;
       int rhs_chan = 0;
 
-      assert(ir->lhs->type->is_vector());
       l.writemask = ir->write_mask;
 
       for (int i = 0; i < 4; i++) {

From 3bd06e5b82b438041f50e2469be9ea68bf3b4300 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 24 Jun 2011 22:32:26 -0500
Subject: [PATCH 165/600] glsl_to_tgsi: use the correct writemask in
 try_emit_mad() and try_emit_sat()

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5f22f7091d6..13573fc1b94 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1133,6 +1133,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 {
    int nonmul_operand = 1 - mul_operand;
    st_src_reg a, b, c;
+   st_dst_reg result_dst;
 
    ir_expression *expr = ir->operands[mul_operand]->as_expression();
    if (!expr || expr->operation != ir_binop_mul)
@@ -1146,7 +1147,9 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    c = this->result;
 
    this->result = get_temp(ir->type);
-   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c);
+   result_dst = st_dst_reg(this->result);
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+   emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
 
    return true;
 }
@@ -1168,8 +1171,10 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
    st_src_reg src = this->result;
 
    this->result = get_temp(ir->type);
+   st_dst_reg result_dst = st_dst_reg(this->result);
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
    glsl_to_tgsi_instruction *inst;
-   inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src);
+   inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
    inst->saturate = true;
 
    return true;

From 71cbc9e3c4c9ef6090ee31e87601ae64af26321e Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 24 Jun 2011 23:17:30 -0500
Subject: [PATCH 166/600] glsl_to_tgsi: improve eliminate_dead_code_advanced()

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 13573fc1b94..15a1a3c51c4 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3379,6 +3379,15 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
       }
    }
 
+   /* Anything still in the write array at this point is dead code. */
+   for (int r = 0; r < this->next_temp; r++) {
+      for (int c = 0; c < 4; c++) {
+         glsl_to_tgsi_instruction *inst = writes[4 * r + c];
+         if (inst)
+            inst->dead_mask |= (1 << c);
+      }
+   }
+
    /* Now actually remove the instructions that are completely dead and update
     * the writemask of other instructions with dead channels.
     */

From f00406b68c07f97b11e873c04917cafdb1a67462 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 27 Jun 2011 17:11:07 -0500
Subject: [PATCH 167/600] glsl_to_tgsi: improve assignment handling

This is a hack, but it's better than emitting an unnecessary MOV instruction
and hoping the optimization passes clean it up.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 15a1a3c51c4..e38617ae9fe 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -695,13 +695,13 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
    st_src_reg tmp = get_temp(glsl_type::float_type);
 
    if (src0.type == GLSL_TYPE_INT)
-      emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
+      emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
    else if (src0.type == GLSL_TYPE_UINT)
-      emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
+      emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
    else
       tmp = src0;
    
-   emit(ir, TGSI_OPCODE_ARL, dst, tmp);
+   emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
 }
 
 /**
@@ -1902,6 +1902,17 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
          l.index++;
          r.index++;
       }
+   } else if (ir->rhs->as_expression() &&
+              this->instructions.get_tail() &&
+              ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
+              type_size(ir->lhs->type) == 1) {
+      /* To avoid emitting an extra MOV when assigning an expression to a 
+       * variable, change the destination register of the last instruction 
+       * emitted as part of the expression to the assignment variable.
+       */
+      glsl_to_tgsi_instruction *inst;
+      inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+      inst->dst = l;
    } else {
       for (i = 0; i < type_size(ir->lhs->type); i++) {
          emit(ir, TGSI_OPCODE_MOV, l, r);

From 4c8b6a286887628e5fc35306189a4c4a83c482ea Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 27 Jun 2011 17:25:50 -0500
Subject: [PATCH 168/600] glsl_to_tgsi: fix mistake in new dead code
 elimination pass

The conditions of IF opcodes were not being counted as reads, which sometimes
led to the condition register being wrong or undefined.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e38617ae9fe..f87c64f62c7 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3315,10 +3315,6 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
          memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
          break;
 
-      case TGSI_OPCODE_IF:
-         ++level;
-         break;
-
       case TGSI_OPCODE_ENDIF:
          --level;
          break;
@@ -3341,6 +3337,10 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
          }
          break;
 
+      case TGSI_OPCODE_IF:
+         ++level;
+         /* fallthrough to default case to mark the condition as read */
+      
       default:
          /* Continuing the block, clear any channels from the write array that
           * are read by this instruction.

From 9c2810103d107d1e5ef8bd8b57819d12264f664a Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 27 Jun 2011 17:40:10 -0500
Subject: [PATCH 169/600] glsl_to_tgsi: always run copy_propagate() and
 eliminate_dead_code_advanced()

These two passes are written to handle indirect addressing properly.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f87c64f62c7..e7d0af83a6b 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4422,18 +4422,17 @@ get_mesa_program(struct gl_context *ctx,
    if (target == GL_VERTEX_PROGRAM_ARB)
       v->remove_output_reads(PROGRAM_VARYING);
    
-   /* Perform the simplify_cmp optimization, which is required by r300g. */
+   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
    v->simplify_cmp();
+   v->copy_propagate();
+   while (v->eliminate_dead_code_advanced());
 
-   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor.
-    * FIXME: These passes to optimize temporary registers don't work when there
+   /* FIXME: These passes to optimize temporary registers don't work when there
     * is indirect addressing of the temporary register space.  We need proper 
     * array support so that we don't have to give up these passes in every 
     * shader that uses arrays.
     */
    if (!v->indirect_addr_temps) {
-      v->copy_propagate();
-      while (v->eliminate_dead_code_advanced());
       v->eliminate_dead_code();
       v->merge_registers();
       v->renumber_registers();

From 54db6e618e43abbd69b59e0a03e2b6ec83d3120f Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 30 Jun 2011 13:42:37 -0500
Subject: [PATCH 170/600] r200, r600c, i965: fix build

---
 src/mesa/drivers/dri/i965/brw_fs.cpp           |  2 +-
 src/mesa/drivers/dri/i965/brw_vs_emit.c        |  2 +-
 src/mesa/drivers/dri/r200/r200_vertprog.c      |  8 ++++----
 src/mesa/drivers/dri/r600/evergreen_fragprog.c |  8 ++++----
 src/mesa/drivers/dri/r600/evergreen_vertprog.c | 16 ++++++++--------
 src/mesa/drivers/dri/r600/r700_fragprog.c      |  8 ++++----
 src/mesa/drivers/dri/r600/r700_vertprog.c      | 16 ++++++++--------
 7 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 7c73a8fbf02..31f76f8c939 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -605,7 +605,7 @@ fs_visitor::setup_paramvalues_refs()
    /* Set up the pointers to ParamValues now that that array is finalized. */
    for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
       c->prog_data.param[i] =
-	 fp->Base.Parameters->ParameterValues[this->param_index[i]] +
+	 (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] +
 	 this->param_offset[i];
    }
 }
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index b6c9e5a1ceb..2fa04a15a34 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1359,7 +1359,7 @@ get_src_reg( struct brw_vs_compile *c,
 
 	 if (component >= 0) {
 	    params = c->vp->program.Base.Parameters;
-	    f = params->ParameterValues[src->Index][component];
+	    f = params->ParameterValues[src->Index][component].f;
 
 	    if (src->Abs)
 	       f = fabs(f);
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
index 63e03b0e0c7..cf44d7f459c 100644
--- a/src/mesa/drivers/dri/r200/r200_vertprog.c
+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
@@ -126,10 +126,10 @@ static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_
       case PROGRAM_NAMED_PARAM:
       //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
       case PROGRAM_CONSTANT:
-	 *fcmd++ = paramList->ParameterValues[pi][0];
-	 *fcmd++ = paramList->ParameterValues[pi][1];
-	 *fcmd++ = paramList->ParameterValues[pi][2];
-	 *fcmd++ = paramList->ParameterValues[pi][3];
+	 *fcmd++ = paramList->ParameterValues[pi][0].f;
+	 *fcmd++ = paramList->ParameterValues[pi][1].f;
+	 *fcmd++ = paramList->ParameterValues[pi][2].f;
+	 *fcmd++ = paramList->ParameterValues[pi][3].f;
 	 break;
       default:
 	 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c
index e527c379b62..cc584ca2b35 100644
--- a/src/mesa/drivers/dri/r600/evergreen_fragprog.c
+++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c
@@ -752,10 +752,10 @@ GLboolean evergreenSetupFPconstants(struct gl_context * ctx)
 	    unNumParamData = paramList->NumParameters;
 
 	    for(ui=0; ui<unNumParamData; ui++) {
-		        evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
-		        evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
-		        evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
-		        evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+		        evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+		        evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+		        evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+		        evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
 	    }
 
 	    /* alloc multiple of 16 constants */
diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c
index 018869b9996..117916ac78f 100644
--- a/src/mesa/drivers/dri/r600/evergreen_vertprog.c
+++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c
@@ -684,17 +684,17 @@ GLboolean evergreenSetupVPconstants(struct gl_context * ctx)
 	    for(ui=0; ui<unNumParamData; ui++) {
             if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) 
             {
-                evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
-		        evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
-		        evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
-		        evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+                evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f;
+		        evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f;
+		        evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f;
+		        evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f;
             }
             else
             {
-		        evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
-		        evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
-		        evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
-		        evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+		        evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+		        evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+		        evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+		        evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
             }
 	    }
 
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 40494cd6af0..6f9834e68fe 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -778,10 +778,10 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx)
 	    unNumParamData = paramList->NumParameters;
 
 	    for(ui=0; ui<unNumParamData; ui++) {
-		        r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
-		        r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
-		        r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
-		        r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+		        r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+		        r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+		        r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+		        r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
 	    }
 
         /* Load fp constants to gpu */
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 7d4be9180a0..b1e2742b27d 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -720,17 +720,17 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx)
 	    for(ui=0; ui<unNumParamData; ui++) {
             if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) 
             {
-                r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
-		        r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
-		        r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
-		        r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+              r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f;
+		        r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f;
+		        r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f;
+		        r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f;
             }
             else
             {
-		        r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
-		        r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
-		        r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
-		        r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+		        r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f;
+		        r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f;
+		        r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f;
+		        r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f;
             }
 	    }
 

From 33e0c47b05c8fbae9d7af57ba65b612825b5db60 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Mon, 4 Jul 2011 08:44:12 -0500
Subject: [PATCH 171/600] glsl_to_tgsi: replace MAX_PROGRAM_TEMPS (256) with
 MAX_TEMPS (4096)

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e7d0af83a6b..d7afc22c048 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -77,6 +77,8 @@ extern "C" {
                            (1 << PROGRAM_CONSTANT) |     \
                            (1 << PROGRAM_UNIFORM))
 
+#define MAX_TEMPS         4096
+
 class st_src_reg;
 class st_dst_reg;
 
@@ -2751,11 +2753,11 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
    GLint outputMap[VERT_RESULT_MAX];
    GLint outputTypes[VERT_RESULT_MAX];
    GLuint numVaryingReads = 0;
-   GLboolean usedTemps[MAX_PROGRAM_TEMPS];
+   GLboolean usedTemps[MAX_TEMPS];
    GLuint firstTemp = 0;
 
    _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
-                             usedTemps, MAX_PROGRAM_TEMPS);
+                             usedTemps, MAX_TEMPS);
 
    assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
    assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
@@ -2775,7 +2777,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
             if (outputMap[var] == -1) {
                numVaryingReads++;
                outputMap[var] = _mesa_find_free_register(usedTemps,
-                                                         MAX_PROGRAM_TEMPS,
+                                                         MAX_TEMPS,
                                                          firstTemp);
                outputTypes[var] = inst->src[j].type;
                firstTemp = outputMap[var] + 1;
@@ -2857,7 +2859,7 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src)
 void
 glsl_to_tgsi_visitor::simplify_cmp(void)
 {
-   unsigned tempWrites[MAX_PROGRAM_TEMPS];
+   unsigned tempWrites[MAX_TEMPS];
    unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
 
    memset(tempWrites, 0, sizeof(tempWrites));
@@ -2883,7 +2885,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
          prevWriteMask = outputWrites[inst->dst.index];
          outputWrites[inst->dst.index] |= inst->dst.writemask;
       } else if (inst->dst.file == PROGRAM_TEMPORARY) {
-         assert(inst->dst.index < MAX_PROGRAM_TEMPS);
+         assert(inst->dst.index < MAX_TEMPS);
          prevWriteMask = tempWrites[inst->dst.index];
          tempWrites[inst->dst.index] |= inst->dst.writemask;
       }
@@ -3504,7 +3506,7 @@ struct label {
 struct st_translate {
    struct ureg_program *ureg;
 
-   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
+   struct ureg_dst temps[MAX_TEMPS];
    struct ureg_src *constants;
    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];

From c0dcab2882a4731dccd363a40c3ebcabc88b9c5d Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 8 Jul 2011 21:12:08 -0500
Subject: [PATCH 172/600] st/mesa, glsl_to_tgsi: support
 glDrawPixels/glCopyPixels with a GLSL fragment shader active

Since this was previously implemented using Mesa IR and _mesa_combine_programs,
this commit adds a new code path that works with glsl_to_tgsi.
---
 src/mesa/state_tracker/st_cb_drawpixels.c  |  65 +++++++++++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 126 +++++++++++++++++++++
 src/mesa/state_tracker/st_glsl_to_tgsi.h   |   3 +
 3 files changed, 194 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 965fbcd1d9e..f4dd2a42847 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -94,6 +94,67 @@ is_passthrough_program(const struct gl_fragment_program *prog)
 }
 
 
+/* XXX copied verbatim from st_atom_pixeltransfer.c */
+static struct pipe_resource *
+create_color_map_texture(struct gl_context *ctx)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_resource *pt;
+   enum pipe_format format;
+   const uint texSize = 256; /* simple, and usually perfect */
+
+   /* find an RGBA texture format */
+   format = st_choose_format(pipe->screen, GL_RGBA,
+                             PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
+
+   /* create texture for color map/table */
+   pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
+                          texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
+   return pt;
+}
+
+
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ */
+static struct gl_fragment_program *
+get_glsl_pixel_transfer_program(struct st_context *st,
+                                struct st_fragment_program *orig)
+{
+   int pixelMaps = 0, scaleAndBias = 0;
+   struct gl_context *ctx = st->ctx;
+   struct st_fragment_program *fp = (struct st_fragment_program *)
+      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+
+   if (!fp)
+      return NULL;
+
+   if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 ||
+       ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 ||
+       ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 ||
+       ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) {
+      scaleAndBias = 1;
+   }
+
+   pixelMaps = ctx->Pixel.MapColorFlag;
+
+   if (pixelMaps) {
+      /* create the colormap/texture now if not already done */
+      if (!st->pixel_xfer.pixelmap_texture) {
+         st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx);
+         st->pixel_xfer.pixelmap_sampler_view =
+            st_create_texture_sampler_view(st->pipe,
+                                           st->pixel_xfer.pixelmap_texture);
+      }
+   }
+
+   get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi,
+                              scaleAndBias, pixelMaps);
+
+   return &fp->Base;
+}
+
 
 /**
  * Make fragment shader for glDraw/CopyPixels.  This shader is made
@@ -107,11 +168,15 @@ st_make_drawpix_fragment_program(struct st_context *st,
                                  struct gl_fragment_program **fpOut)
 {
    struct gl_program *newProg;
+   struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn;
 
    if (is_passthrough_program(fpIn)) {
       newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx,
                                              &st->pixel_xfer.program->Base);
    }
+   else if (stfp->glsl_to_tgsi != NULL) {
+      newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp);
+   }
    else {
 #if 0
       /* debug */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index d7afc22c048..ae0c92f5f13 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3494,6 +3494,132 @@ glsl_to_tgsi_visitor::renumber_registers(void)
    this->next_temp = new_index;
 }
 
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
+ */
+extern "C" void
+get_pixel_transfer_visitor(struct st_fragment_program *fp,
+                           glsl_to_tgsi_visitor *original,
+                           int scale_and_bias, int pixel_maps)
+{
+   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+   struct st_context *st = st_context(original->ctx);
+   struct gl_program *prog = &fp->Base.Base;
+   struct gl_program_parameter_list *params = _mesa_new_parameter_list();
+   st_src_reg coord, src0;
+   st_dst_reg dst0;
+   glsl_to_tgsi_instruction *inst;
+
+   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+   v->ctx = original->ctx;
+   v->prog = prog;
+   v->glsl_version = original->glsl_version;
+   v->options = original->options;
+   v->next_temp = original->next_temp;
+   v->num_address_regs = original->num_address_regs;
+   v->samplers_used = prog->SamplersUsed = original->samplers_used;
+   v->indirect_addr_temps = original->indirect_addr_temps;
+   v->indirect_addr_consts = original->indirect_addr_consts;
+
+   /*
+    * Get initial pixel color from the texture.
+    * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
+    */
+   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+   src0 = v->get_temp(glsl_type::vec4_type);
+   dst0 = st_dst_reg(src0);
+   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+   inst->sampler = 0;
+   inst->tex_target = TEXTURE_2D_INDEX;
+
+   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR);
+   prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
+   v->samplers_used |= (1 << 0);
+
+   if (scale_and_bias) {
+      static const gl_state_index scale_state[STATE_LENGTH] =
+         { STATE_INTERNAL, STATE_PT_SCALE,
+           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+      static const gl_state_index bias_state[STATE_LENGTH] =
+         { STATE_INTERNAL, STATE_PT_BIAS,
+           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+      GLint scale_p, bias_p;
+      st_src_reg scale, bias;
+
+      scale_p = _mesa_add_state_reference(params, scale_state);
+      bias_p = _mesa_add_state_reference(params, bias_state);
+
+      /* MAD colorTemp, colorTemp, scale, bias; */
+      scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
+      bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
+      inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
+   }
+
+   if (pixel_maps) {
+      st_src_reg temp = v->get_temp(glsl_type::vec4_type);
+      st_dst_reg temp_dst = st_dst_reg(temp);
+
+      assert(st->pixel_xfer.pixelmap_texture);
+
+      /* With a little effort, we can do four pixel map look-ups with
+       * two TEX instructions:
+       */
+
+      /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
+      temp_dst.writemask = WRITEMASK_XY; /* write R,G */
+      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+      inst->sampler = 1;
+      inst->tex_target = TEXTURE_2D_INDEX;
+
+      /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
+      src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
+      temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
+      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+      inst->sampler = 1;
+      inst->tex_target = TEXTURE_2D_INDEX;
+
+      prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
+      v->samplers_used |= (1 << 1);
+
+      /* MOV colorTemp, temp; */
+      inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
+   }
+
+   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+    * new visitor. */
+   foreach_iter(exec_list_iterator, iter, original->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      st_src_reg src_regs[3];
+
+      for (int i=0; i<3; i++) {
+         src_regs[i] = inst->src[i];
+         if (src_regs[i].file == PROGRAM_INPUT &&
+             src_regs[i].index == FRAG_ATTRIB_COL0)
+         {
+            src_regs[i].file = PROGRAM_TEMPORARY;
+            src_regs[i].index = src0.index;
+         }
+         else if (src_regs[i].file == PROGRAM_INPUT)
+            prog->InputsRead |= (1 << src_regs[i].index);
+         else if (src_regs[i].file == PROGRAM_OUTPUT)
+            prog->OutputsWritten |= BITFIELD64_BIT(src_regs[i].index);
+      }
+
+      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+   }
+
+   /* Make modifications to fragment program info. */
+   prog->Parameters = _mesa_combine_parameter_lists(params,
+                                                    original->prog->Parameters);
+   prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
+   prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
+   _mesa_free_parameter_list(params);
+   count_resources(v, prog);
+   fp->glsl_to_tgsi = v;
+}
+
 /* ------------------------- TGSI conversion stuff -------------------------- */
 struct label {
    unsigned branch_target;
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
index e21c0d1e0af..7884a9feb71 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -52,6 +52,9 @@ enum pipe_error st_translate_program(
    boolean passthrough_edgeflags);
 
 void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v);
+void get_pixel_transfer_visitor(struct st_fragment_program *fp,
+                                struct glsl_to_tgsi_visitor *original,
+                                int scale_and_bias, int pixel_maps);
 
 struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
 

From 5f0b4b0e9d376f9ec1cb5ae08c36052f4f51ac37 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sun, 10 Jul 2011 17:17:38 -0500
Subject: [PATCH 173/600] st/mesa, glsl_to_tgsi: support glBitmap with a GLSL
 fragment shader active

---
 src/mesa/state_tracker/st_cb_bitmap.c      | 35 +++++++++--
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 73 ++++++++++++++++++++++
 src/mesa/state_tracker/st_glsl_to_tgsi.h   |  3 +
 3 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 49b196032b9..f0750b518ad 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -172,6 +172,23 @@ make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex)
 }
 
 
+static struct gl_program *
+make_bitmap_fragment_program_glsl(struct st_context *st,
+                                  struct st_fragment_program *orig,
+                                  GLuint samplerIndex)
+{
+   struct gl_context *ctx = st->ctx;
+   struct st_fragment_program *fp = (struct st_fragment_program *)
+      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+
+   if (!fp)
+      return NULL;
+   
+   get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex);
+   return &fp->Base.Base;
+}
+
+
 static int
 find_free_bit(uint bitfield)
 {
@@ -199,6 +216,7 @@ st_make_bitmap_fragment_program(struct st_context *st,
                                 GLuint *bitmap_sampler)
 {
    struct st_fragment_program *bitmap_prog;
+   struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn;
    struct gl_program *newProg;
    uint sampler;
 
@@ -207,13 +225,18 @@ st_make_bitmap_fragment_program(struct st_context *st,
     * with the bitmap sampler/kill instructions.
     */
    sampler = find_free_bit(fpIn->Base.SamplersUsed);
-   bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
+   
+   if (stfpIn->glsl_to_tgsi)
+      newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler);
+   else {
+      bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
 
-   newProg = _mesa_combine_programs(st->ctx,
-                                    &bitmap_prog->Base.Base,
-                                    &fpIn->Base);
-   /* done with this after combining */
-   st_reference_fragprog(st, &bitmap_prog, NULL);
+      newProg = _mesa_combine_programs(st->ctx,
+                                       &bitmap_prog->Base.Base,
+                                       &fpIn->Base);
+      /* done with this after combining */
+      st_reference_fragprog(st, &bitmap_prog, NULL);
+   }
 
 #if 0
    {
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index ae0c92f5f13..74f15087947 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3620,6 +3620,79 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    fp->glsl_to_tgsi = v;
 }
 
+/**
+ * Make fragment program for glBitmap:
+ *   Sample the texture and kill the fragment if the bit is 0.
+ * This program will be combined with the user's fragment program.
+ *
+ * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
+ */
+extern "C" void
+get_bitmap_visitor(struct st_fragment_program *fp,
+                   glsl_to_tgsi_visitor *original, int samplerIndex)
+{
+   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+   struct st_context *st = st_context(original->ctx);
+   struct gl_program *prog = &fp->Base.Base;
+   st_src_reg coord, src0;
+   st_dst_reg dst0;
+   glsl_to_tgsi_instruction *inst;
+
+   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+   v->ctx = original->ctx;
+   v->prog = prog;
+   v->glsl_version = original->glsl_version;
+   v->options = original->options;
+   v->next_temp = original->next_temp;
+   v->num_address_regs = original->num_address_regs;
+   v->samplers_used = prog->SamplersUsed = original->samplers_used;
+   v->indirect_addr_temps = original->indirect_addr_temps;
+   v->indirect_addr_consts = original->indirect_addr_consts;
+
+   /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
+   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+   src0 = v->get_temp(glsl_type::vec4_type);
+   dst0 = st_dst_reg(src0);
+   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+   inst->sampler = samplerIndex;
+   inst->tex_target = TEXTURE_2D_INDEX;
+
+   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
+   v->samplers_used |= (1 << samplerIndex);
+
+   /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
+   src0.negate = NEGATE_XYZW;
+   if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
+      src0.swizzle = SWIZZLE_XXXX;
+   inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
+
+   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+    * new visitor. */
+   foreach_iter(exec_list_iterator, iter, original->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      st_src_reg src_regs[3];
+
+      if (inst->dst.file == PROGRAM_OUTPUT)
+         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
+      for (int i=0; i<3; i++) {
+         src_regs[i] = inst->src[i];
+         if (src_regs[i].file == PROGRAM_INPUT)
+            prog->InputsRead |= (1 << src_regs[i].index);
+      }
+
+      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+   }
+
+   /* Make modifications to fragment program info. */
+   prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
+   prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
+   prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
+   count_resources(v, prog);
+   fp->glsl_to_tgsi = v;
+}
+
 /* ------------------------- TGSI conversion stuff -------------------------- */
 struct label {
    unsigned branch_target;
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
index 7884a9feb71..d877471785d 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -55,6 +55,9 @@ void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v);
 void get_pixel_transfer_visitor(struct st_fragment_program *fp,
                                 struct glsl_to_tgsi_visitor *original,
                                 int scale_and_bias, int pixel_maps);
+void get_bitmap_visitor(struct st_fragment_program *fp,
+                        struct glsl_to_tgsi_visitor *original,
+                        int samplerIndex);
 
 struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
 

From 87f8d8547db9b947ae847c509a464e06d0ac6c64 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sun, 10 Jul 2011 17:36:04 -0500
Subject: [PATCH 174/600] glsl_to_tgsi: fix mistakes in
 get_pixel_transfer_visitor()

I noticed these issues while working on get_bitmap_visitor().
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 74f15087947..3df22eae918 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3534,7 +3534,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    inst->tex_target = TEXTURE_2D_INDEX;
 
    prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
-   prog->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR);
    prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
    v->samplers_used |= (1 << 0);
 
@@ -3593,6 +3592,9 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
       st_src_reg src_regs[3];
 
+      if (inst->dst.file == PROGRAM_OUTPUT)
+         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
       for (int i=0; i<3; i++) {
          src_regs[i] = inst->src[i];
          if (src_regs[i].file == PROGRAM_INPUT &&
@@ -3603,8 +3605,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
          }
          else if (src_regs[i].file == PROGRAM_INPUT)
             prog->InputsRead |= (1 << src_regs[i].index);
-         else if (src_regs[i].file == PROGRAM_OUTPUT)
-            prog->OutputsWritten |= BITFIELD64_BIT(src_regs[i].index);
       }
 
       v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);

From 7732822c833ee22e259af3f8bd2bfb57c986612e Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 21 Jul 2011 15:49:26 -0500
Subject: [PATCH 175/600] glsl_to_tgsi: separate immediates from array
 constants during IR translation

Before, if any uniform or constant array was accessed with indirect
addressing, st_translate_program() would emit uniform constants in the place
of immediates.  This behavior was unavoidable with ir_to_mesa/mesa_to_tgsi, but
glsl_to_tgsi can work around it since the GLSL IR backend and the TGSI
emission are both inside the state tracker.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 143 ++++++++++++++-------
 1 file changed, 95 insertions(+), 48 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 3df22eae918..389e5d8e2ef 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -70,6 +70,7 @@ extern "C" {
 #include "st_mesa_to_tgsi.h"
 }
 
+#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
                            (1 << PROGRAM_ENV_PARAM) |    \
                            (1 << PROGRAM_STATE_VAR) |    \
@@ -272,6 +273,7 @@ public:
    struct gl_program *prog;
    struct gl_shader_program *shader_program;
    struct gl_shader_compiler_options *options;
+   struct gl_program_parameter_list *immediates;
 
    int next_temp;
 
@@ -505,6 +507,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
       case PROGRAM_UNIFORM:
          this->indirect_addr_consts = true;
          break;
+      case PROGRAM_IMMEDIATE:
+         assert(!"immediates should not have indirect addressing");
+         break;
       default:
          break;
       }
@@ -524,6 +529,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
             case PROGRAM_UNIFORM:
                this->indirect_addr_consts = true;
                break;
+            case PROGRAM_IMMEDIATE:
+               assert(!"immediates should not have indirect addressing");
+               break;
             default:
                break;
             }
@@ -804,12 +812,12 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
 struct st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 {
-   st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT);
+   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
    union gl_constant_value uval;
 
    uval.f = val;
-   src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
-        				  &uval, 1, GL_FLOAT, &src.swizzle);
+   src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1,
+                                                GL_FLOAT, &src.swizzle);
 
    return src;
 }
@@ -817,14 +825,14 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 struct st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
 {
-   st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT);
+   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
    union gl_constant_value uval;
    
    assert(glsl_version >= 130);
 
    uval.i = val;
-   src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
-        				  &uval, 1, GL_INT, &src.swizzle);
+   src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1,
+                                                GL_INT, &src.swizzle);
 
    return src;
 }
@@ -1933,9 +1941,15 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
    gl_constant_value *values = (gl_constant_value *) stack_vals;
    GLenum gl_type = GL_NONE;
    unsigned int i;
+   gl_register_file file;
+   gl_program_parameter_list *param_list;
+   static int in_array = 0;
+
+   file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
+   param_list = in_array ? this->prog->Parameters : this->immediates;
 
    /* Unfortunately, 4 floats is all we can get into
-    * _mesa_add_unnamed_constant.  So, make a temp to store an
+    * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
     * aggregate constant and move each constant value into it.  If we
     * get lucky, copy propagation will eliminate the extra moves.
     */
@@ -1969,6 +1983,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
       int size = type_size(ir->type->fields.array);
 
       assert(size > 0);
+      in_array++;
 
       for (i = 0; i < ir->type->length; i++) {
          ir->array_elements[i]->accept(this);
@@ -1981,6 +1996,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          }
       }
       this->result = temp_base;
+      in_array--;
       return;
    }
 
@@ -1992,8 +2008,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
          values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
 
-         src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type);
-         src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+         src = st_src_reg(file, -1, ir->type->base_type);
+         src.index = _mesa_add_typed_unnamed_constant(param_list,
                                                       values,
                                                       ir->type->vector_elements,
                                                       GL_FLOAT,
@@ -2007,7 +2023,6 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
       return;
    }
 
-   src.file = PROGRAM_CONSTANT;
    switch (ir->type->base_type) {
    case GLSL_TYPE_FLOAT:
       gl_type = GL_FLOAT;
@@ -2046,8 +2061,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
       assert(!"Non-float/uint/int/bool constant");
    }
 
-   this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type);
-   this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+   this->result = st_src_reg(file, -1, ir->type);
+   this->result.index = _mesa_add_typed_unnamed_constant(param_list,
         					   values, ir->type->vector_elements, gl_type,
         					   &this->result.swizzle);
 }
@@ -2430,11 +2445,13 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
    num_address_regs = 0;
    indirect_addr_temps = false;
    indirect_addr_consts = false;
+   immediates = _mesa_new_parameter_list();
    mem_ctx = ralloc_context(NULL);
 }
 
 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
 {
+   _mesa_free_parameter_list(immediates);
    ralloc_free(mem_ctx);
 }
 
@@ -3521,6 +3538,8 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    v->samplers_used = prog->SamplersUsed = original->samplers_used;
    v->indirect_addr_temps = original->indirect_addr_temps;
    v->indirect_addr_consts = original->indirect_addr_consts;
+   _mesa_free_parameter_list(v->immediates);
+   v->immediates = _mesa_clone_parameter_list(original->immediates);
 
    /*
     * Get initial pixel color from the texture.
@@ -3648,6 +3667,8 @@ get_bitmap_visitor(struct st_fragment_program *fp,
    v->samplers_used = prog->SamplersUsed = original->samplers_used;
    v->indirect_addr_temps = original->indirect_addr_temps;
    v->indirect_addr_consts = original->indirect_addr_consts;
+   _mesa_free_parameter_list(v->immediates);
+   v->immediates = _mesa_clone_parameter_list(original->immediates);
 
    /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
    coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
@@ -3707,6 +3728,7 @@ struct st_translate {
 
    struct ureg_dst temps[MAX_TEMPS];
    struct ureg_src *constants;
+   struct ureg_src *immediates;
    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
    struct ureg_dst address[1];
@@ -3797,6 +3819,43 @@ static void set_insn_start( struct st_translate *t,
    t->insn[t->insn_count++] = start;
 }
 
+/**
+ * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
+ */
+static struct ureg_src
+emit_immediate( struct st_translate *t,
+                struct gl_program_parameter_list *params,
+                int index)
+{
+   struct ureg_program *ureg = t->ureg;
+
+   switch(params->Parameters[index].DataType)
+   {
+   case GL_FLOAT:
+   case GL_FLOAT_VEC2:
+   case GL_FLOAT_VEC3:
+   case GL_FLOAT_VEC4:
+      return ureg_DECL_immediate(ureg, (float *)params->ParameterValues[index], 4);
+   case GL_INT:
+   case GL_INT_VEC2:
+   case GL_INT_VEC3:
+   case GL_INT_VEC4:
+      return ureg_DECL_immediate_int(ureg, (int *)params->ParameterValues[index], 4);
+   case GL_UNSIGNED_INT:
+   case GL_UNSIGNED_INT_VEC2:
+   case GL_UNSIGNED_INT_VEC3:
+   case GL_UNSIGNED_INT_VEC4:
+   case GL_BOOL:
+   case GL_BOOL_VEC2:
+   case GL_BOOL_VEC3:
+   case GL_BOOL_VEC4:
+      return ureg_DECL_immediate_uint(ureg, (unsigned *)params->ParameterValues[index], 4);
+   default:
+      assert(!"should not get here - type must be float, int, uint, or bool");
+      return ureg_src_undef();
+   }
+}
+
 /**
  * Map a Mesa dst register to a TGSI ureg_dst register.
  */
@@ -3871,6 +3930,9 @@ src_register( struct st_translate *t,
       else
          return t->constants[index];
 
+   case PROGRAM_IMMEDIATE:
+      return t->immediates[index];
+
    case PROGRAM_INPUT:
       assert(t->inputMapping[index] < Elements(t->inputs));
       return t->inputs[t->inputMapping[index]];
@@ -4402,9 +4464,8 @@ st_translate_program(
       }
    }
 
-   /* Emit constants and immediates.  Mesa uses a single index space
-    * for these, so we put all the translated regs in t->constants.
-    * XXX: this entire if block depends on proginfo->Parameters from Mesa IR
+   /* Emit constants and uniforms.  TGSI uses a single index space for these, 
+    * so we put all the translated regs in t->constants.
     */
    if (proginfo->Parameters) {
       t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] );
@@ -4423,49 +4484,34 @@ st_translate_program(
             t->constants[i] = ureg_DECL_constant( ureg, i );
             break;
 
-            /* Emit immediates only when there's no indirect addressing of
-             * the const buffer.
-             * FIXME: Be smarter and recognize param arrays:
-             * indirect addressing is only valid within the referenced
-             * array.
-             */
+         /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
+          * addressing of the const buffer.
+          * FIXME: Be smarter and recognize param arrays:
+          * indirect addressing is only valid within the referenced
+          * array.
+          */
          case PROGRAM_CONSTANT:
             if (program->indirect_addr_consts)
                t->constants[i] = ureg_DECL_constant( ureg, i );
             else
-               switch(proginfo->Parameters->Parameters[i].DataType)
-               {
-               case GL_FLOAT:
-               case GL_FLOAT_VEC2:
-               case GL_FLOAT_VEC3:
-               case GL_FLOAT_VEC4:
-                  t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4);
-                  break;
-               case GL_INT:
-               case GL_INT_VEC2:
-               case GL_INT_VEC3:
-               case GL_INT_VEC4:
-                  t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4);
-                  break;
-               case GL_UNSIGNED_INT:
-               case GL_UNSIGNED_INT_VEC2:
-               case GL_UNSIGNED_INT_VEC3:
-               case GL_UNSIGNED_INT_VEC4:
-               case GL_BOOL:
-               case GL_BOOL_VEC2:
-               case GL_BOOL_VEC3:
-               case GL_BOOL_VEC4:
-                  t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4);
-                  break;
-               default:
-                  assert(!"should not get here");
-               }
+               t->constants[i] = emit_immediate( t, proginfo->Parameters, i );
             break;
          default:
             break;
          }
       }
    }
+   
+   /* Emit immediate values.
+    */
+   t->immediates = (struct ureg_src *)CALLOC( program->immediates->NumParameters * sizeof(struct ureg_src) );
+   if (t->immediates == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto out;
+   }
+   for (i = 0; i < program->immediates->NumParameters; i++) {
+      t->immediates[i] = emit_immediate( t, program->immediates, i );
+   }
 
    /* texture samplers */
    for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
@@ -4512,6 +4558,7 @@ out:
    FREE(t->insn);
    FREE(t->labels);
    FREE(t->constants);
+   FREE(t->immediates);
 
    if (t->error) {
       debug_printf("%s: translate error flag set\n", __FUNCTION__);

From 0da994a9f15b461d16cf88ce16dc07e98dfada6f Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 21 Jul 2011 16:29:56 -0500
Subject: [PATCH 176/600] glsl_to_tgsi: make assignment hack safer

Fixes an assertion failure in piglit test glsl-texcoord-array.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 389e5d8e2ef..6e01a44a733 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1917,12 +1917,13 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
               ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
               type_size(ir->lhs->type) == 1) {
       /* To avoid emitting an extra MOV when assigning an expression to a 
-       * variable, change the destination register of the last instruction 
-       * emitted as part of the expression to the assignment variable.
+       * variable, emit the last instruction of the expression again, but
+       * replace the destination register with the target of the assignment.
+       * Dead code elimination will remove the original instruction.
        */
       glsl_to_tgsi_instruction *inst;
       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
-      inst->dst = l;
+      emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
    } else {
       for (i = 0; i < type_size(ir->lhs->type); i++) {
          emit(ir, TGSI_OPCODE_MOV, l, r);

From a2c3b9f38d81f363bd62abc87dc3abef2beeba95 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 22 Jul 2011 13:23:26 -0500
Subject: [PATCH 177/600] glsl_to_tgsi: make coding style more consistent

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 263 ++++++++++-----------
 1 file changed, 126 insertions(+), 137 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 6e01a44a733..952900a1fb5 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3778,15 +3778,14 @@ static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
  * of labels built here and patch the TGSI code with the actual
  * location of each label.
  */
-static unsigned *get_label( struct st_translate *t,
-                            unsigned branch_target )
+static unsigned *get_label(struct st_translate *t, unsigned branch_target)
 {
    unsigned i;
 
    if (t->labels_count + 1 >= t->labels_size) {
       t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
       t->labels = (struct label *)realloc(t->labels, 
-                                          t->labels_size * sizeof t->labels[0]);
+                                          t->labels_size * sizeof(struct label));
       if (t->labels == NULL) {
          static unsigned dummy;
          t->error = TRUE;
@@ -3805,12 +3804,11 @@ static unsigned *get_label( struct st_translate *t,
  * Update the insn[] array so the next Mesa instruction points to
  * the next TGSI instruction.
  */
-static void set_insn_start( struct st_translate *t,
-                            unsigned start )
+static void set_insn_start(struct st_translate *t, unsigned start)
 {
    if (t->insn_count + 1 >= t->insn_size) {
       t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
-      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]);
+      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
       if (t->insn == NULL) {
          t->error = TRUE;
          return;
@@ -3824,9 +3822,9 @@ static void set_insn_start( struct st_translate *t,
  * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
  */
 static struct ureg_src
-emit_immediate( struct st_translate *t,
-                struct gl_program_parameter_list *params,
-                int index)
+emit_immediate(struct st_translate *t,
+               struct gl_program_parameter_list *params,
+               int index)
 {
    struct ureg_program *ureg = t->ureg;
 
@@ -3861,17 +3859,17 @@ emit_immediate( struct st_translate *t,
  * Map a Mesa dst register to a TGSI ureg_dst register.
  */
 static struct ureg_dst
-dst_register( struct st_translate *t,
-              gl_register_file file,
-              GLuint index )
+dst_register(struct st_translate *t,
+             gl_register_file file,
+             GLuint index)
 {
-   switch( file ) {
+   switch(file) {
    case PROGRAM_UNDEFINED:
       return ureg_dst_undef();
 
    case PROGRAM_TEMPORARY:
       if (ureg_dst_is_undef(t->temps[index]))
-         t->temps[index] = ureg_DECL_temporary( t->ureg );
+         t->temps[index] = ureg_DECL_temporary(t->ureg);
 
       return t->temps[index];
 
@@ -3894,7 +3892,7 @@ dst_register( struct st_translate *t,
       return t->address[index];
 
    default:
-      debug_assert( 0 );
+      assert(!"unknown dst register file");
       return ureg_dst_undef();
    }
 }
@@ -3903,11 +3901,11 @@ dst_register( struct st_translate *t,
  * Map a Mesa src register to a TGSI ureg_src register.
  */
 static struct ureg_src
-src_register( struct st_translate *t,
-              gl_register_file file,
-              GLuint index )
+src_register(struct st_translate *t,
+             gl_register_file file,
+             GLuint index)
 {
-   switch( file ) {
+   switch(file) {
    case PROGRAM_UNDEFINED:
       return ureg_src_undef();
 
@@ -3915,7 +3913,7 @@ src_register( struct st_translate *t,
       assert(index >= 0);
       assert(index < Elements(t->temps));
       if (ureg_dst_is_undef(t->temps[index]))
-         t->temps[index] = ureg_DECL_temporary( t->ureg );
+         t->temps[index] = ureg_DECL_temporary(t->ureg);
       return ureg_src(t->temps[index]);
 
    case PROGRAM_NAMED_PARAM:
@@ -3927,7 +3925,7 @@ src_register( struct st_translate *t,
    case PROGRAM_STATE_VAR:
    case PROGRAM_CONSTANT:       /* ie, immediate */
       if (index < 0)
-         return ureg_DECL_constant( t->ureg, 0 );
+         return ureg_DECL_constant(t->ureg, 0);
       else
          return t->constants[index];
 
@@ -3950,7 +3948,7 @@ src_register( struct st_translate *t,
       return t->systemValues[index];
 
    default:
-      debug_assert( 0 );
+      assert(!"unknown src register file");
       return ureg_src_undef();
    }
 }
@@ -3959,22 +3957,21 @@ src_register( struct st_translate *t,
  * Create a TGSI ureg_dst register from an st_dst_reg.
  */
 static struct ureg_dst
-translate_dst( struct st_translate *t,
-               const st_dst_reg *dst_reg,
-               boolean saturate )
+translate_dst(struct st_translate *t,
+              const st_dst_reg *dst_reg,
+              bool saturate)
 {
-   struct ureg_dst dst = dst_register( t, 
-                                       dst_reg->file,
-                                       dst_reg->index );
+   struct ureg_dst dst = dst_register(t, 
+                                      dst_reg->file,
+                                      dst_reg->index);
 
-   dst = ureg_writemask( dst, 
-                         dst_reg->writemask );
+   dst = ureg_writemask(dst, dst_reg->writemask);
    
    if (saturate)
-      dst = ureg_saturate( dst );
+      dst = ureg_saturate(dst);
 
    if (dst_reg->reladdr != NULL)
-      dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
+      dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
 
    return dst;
 }
@@ -3983,16 +3980,15 @@ translate_dst( struct st_translate *t,
  * Create a TGSI ureg_src register from an st_src_reg.
  */
 static struct ureg_src
-translate_src( struct st_translate *t,
-               const st_src_reg *src_reg )
+translate_src(struct st_translate *t, const st_src_reg *src_reg)
 {
-   struct ureg_src src = src_register( t, src_reg->file, src_reg->index );
+   struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
 
-   src = ureg_swizzle( src,
-                       GET_SWZ( src_reg->swizzle, 0 ) & 0x3,
-                       GET_SWZ( src_reg->swizzle, 1 ) & 0x3,
-                       GET_SWZ( src_reg->swizzle, 2 ) & 0x3,
-                       GET_SWZ( src_reg->swizzle, 3 ) & 0x3);
+   src = ureg_swizzle(src,
+                      GET_SWZ(src_reg->swizzle, 0) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 1) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 2) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 3) & 0x3);
 
    if ((src_reg->negate & 0xf) == NEGATE_XYZW)
       src = ureg_negate(src);
@@ -4024,8 +4020,8 @@ translate_src( struct st_translate *t,
 }
 
 static void
-compile_tgsi_instruction(struct st_translate *t, 
-        			     const struct glsl_to_tgsi_instruction *inst)
+compile_tgsi_instruction(struct st_translate *t,
+                         const struct glsl_to_tgsi_instruction *inst)
 {
    struct ureg_program *ureg = t->ureg;
    GLuint i;
@@ -4034,29 +4030,29 @@ compile_tgsi_instruction(struct st_translate *t,
    unsigned num_dst;
    unsigned num_src;
 
-   num_dst = num_inst_dst_regs( inst->op );
-   num_src = num_inst_src_regs( inst->op );
+   num_dst = num_inst_dst_regs(inst->op);
+   num_src = num_inst_src_regs(inst->op);
 
    if (num_dst) 
-      dst[0] = translate_dst( t, 
-                              &inst->dst,
-                              inst->saturate);
+      dst[0] = translate_dst(t, 
+                             &inst->dst,
+                             inst->saturate);
 
    for (i = 0; i < num_src; i++) 
-      src[i] = translate_src( t, &inst->src[i] );
+      src[i] = translate_src(t, &inst->src[i]);
 
-   switch( inst->op ) {
+   switch(inst->op) {
    case TGSI_OPCODE_BGNLOOP:
    case TGSI_OPCODE_CAL:
    case TGSI_OPCODE_ELSE:
    case TGSI_OPCODE_ENDLOOP:
    case TGSI_OPCODE_IF:
-      debug_assert(num_dst == 0);
-      ureg_label_insn( ureg,
-                       inst->op,
-                       src, num_src,
-                       get_label( t, 
-                                  inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 ));
+      assert(num_dst == 0);
+      ureg_label_insn(ureg,
+                      inst->op,
+                      src, num_src,
+                      get_label(t, 
+                                inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
       return;
 
    case TGSI_OPCODE_TEX:
@@ -4065,27 +4061,23 @@ compile_tgsi_instruction(struct st_translate *t,
    case TGSI_OPCODE_TXL:
    case TGSI_OPCODE_TXP:
       src[num_src++] = t->samplers[inst->sampler];
-      ureg_tex_insn( ureg,
-                     inst->op,
-                     dst, num_dst, 
-                     translate_texture_target( inst->tex_target,
-                                               inst->tex_shadow ),
-                     src, num_src );
+      ureg_tex_insn(ureg,
+                    inst->op,
+                    dst, num_dst, 
+                    translate_texture_target(inst->tex_target, inst->tex_shadow),
+                    src, num_src);
       return;
 
    case TGSI_OPCODE_SCS:
-      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
-      ureg_insn( ureg, 
-                 inst->op, 
-                 dst, num_dst, 
-                 src, num_src );
+      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
+      ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
       break;
 
    default:
-      ureg_insn( ureg, 
-                 inst->op, 
-                 dst, num_dst, 
-                 src, num_src );
+      ureg_insn(ureg,
+                inst->op,
+                dst, num_dst,
+                src, num_src);
       break;
    }
 }
@@ -4095,9 +4087,9 @@ compile_tgsi_instruction(struct st_translate *t,
  * Basically, add (adjX, adjY) to the fragment position.
  */
 static void
-emit_adjusted_wpos( struct st_translate *t,
-                    const struct gl_program *program,
-                    GLfloat adjX, GLfloat adjY)
+emit_adjusted_wpos(struct st_translate *t,
+                   const struct gl_program *program,
+                   float adjX, float adjY)
 {
    struct ureg_program *ureg = t->ureg;
    struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
@@ -4119,9 +4111,9 @@ emit_adjusted_wpos( struct st_translate *t,
  * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
  */
 static void
-emit_wpos_inversion( struct st_translate *t,
-                     const struct gl_program *program,
-                     boolean invert)
+emit_wpos_inversion(struct st_translate *t,
+                    const struct gl_program *program,
+                    bool invert)
 {
    struct ureg_program *ureg = t->ureg;
 
@@ -4140,7 +4132,7 @@ emit_wpos_inversion( struct st_translate *t,
    unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
                                                        wposTransformState);
 
-   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
+   struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
    struct ureg_dst wpos_temp;
    struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
 
@@ -4149,26 +4141,26 @@ emit_wpos_inversion( struct st_translate *t,
    if (wpos_input.File == TGSI_FILE_TEMPORARY)
       wpos_temp = ureg_dst(wpos_input);
    else {
-      wpos_temp = ureg_DECL_temporary( ureg );
-      ureg_MOV( ureg, wpos_temp, wpos_input );
+      wpos_temp = ureg_DECL_temporary(ureg);
+      ureg_MOV(ureg, wpos_temp, wpos_input);
    }
 
    if (invert) {
       /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
        */
-      ureg_MAD( ureg,
-                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
-                wpos_input,
-                ureg_scalar(wpostrans, 0),
-                ureg_scalar(wpostrans, 1));
+      ureg_MAD(ureg,
+               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+               wpos_input,
+               ureg_scalar(wpostrans, 0),
+               ureg_scalar(wpostrans, 1));
    } else {
       /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
        */
-      ureg_MAD( ureg,
-                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
-                wpos_input,
-                ureg_scalar(wpostrans, 2),
-                ureg_scalar(wpostrans, 3));
+      ureg_MAD(ureg,
+               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+               wpos_input,
+               ureg_scalar(wpostrans, 2),
+               ureg_scalar(wpostrans, 3));
    }
 
    /* Use wpos_temp as position input from here on:
@@ -4312,7 +4304,7 @@ st_translate_program(
    const GLuint outputMapping[],
    const ubyte outputSemanticName[],
    const ubyte outputSemanticIndex[],
-   boolean passthrough_edgeflags )
+   boolean passthrough_edgeflags)
 {
    struct st_translate translate, *t;
    unsigned i;
@@ -4358,27 +4350,24 @@ st_translate_program(
       for (i = 0; i < numOutputs; i++) {
          switch (outputSemanticName[i]) {
          case TGSI_SEMANTIC_POSITION:
-            t->outputs[i] = ureg_DECL_output( ureg,
-                                              TGSI_SEMANTIC_POSITION, /* Z / Depth */
-                                              outputSemanticIndex[i] );
-
-            t->outputs[i] = ureg_writemask( t->outputs[i],
-                                            TGSI_WRITEMASK_Z );
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_POSITION, /* Z/Depth */
+                                             outputSemanticIndex[i]);
+            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
             break;
          case TGSI_SEMANTIC_STENCIL:
-            t->outputs[i] = ureg_DECL_output( ureg,
-                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
-                                              outputSemanticIndex[i] );
-            t->outputs[i] = ureg_writemask( t->outputs[i],
-                                            TGSI_WRITEMASK_Y );
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_STENCIL, /* Stencil */
+                                             outputSemanticIndex[i]);
+            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
             break;
          case TGSI_SEMANTIC_COLOR:
-            t->outputs[i] = ureg_DECL_output( ureg,
-                                              TGSI_SEMANTIC_COLOR,
-                                              outputSemanticIndex[i] );
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_COLOR,
+                                             outputSemanticIndex[i]);
             break;
          default:
-            debug_assert(0);
+            assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
             return PIPE_ERROR_BAD_INPUT;
          }
       }
@@ -4392,9 +4381,9 @@ st_translate_program(
       }
 
       for (i = 0; i < numOutputs; i++) {
-         t->outputs[i] = ureg_DECL_output( ureg,
-                                           outputSemanticName[i],
-                                           outputSemanticIndex[i] );
+         t->outputs[i] = ureg_DECL_output(ureg,
+                                          outputSemanticName[i],
+                                          outputSemanticIndex[i]);
       }
    }
    else {
@@ -4405,9 +4394,9 @@ st_translate_program(
       }
 
       for (i = 0; i < numOutputs; i++) {
-         t->outputs[i] = ureg_DECL_output( ureg,
-                                           outputSemanticName[i],
-                                           outputSemanticIndex[i] );
+         t->outputs[i] = ureg_DECL_output(ureg,
+                                          outputSemanticName[i],
+                                          outputSemanticIndex[i]);
          if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
             /* Writing to the point size result register requires special
              * handling to implement clamping.
@@ -4421,8 +4410,8 @@ st_translate_program(
             unsigned pointSizeClampConst =
                _mesa_add_state_reference(proginfo->Parameters,
                                          pointSizeClampState);
-            struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
-            t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
+            struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg);
+            t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst);
             t->pointSizeResult = t->outputs[i];
             t->pointSizeOutIndex = i;
             t->outputs[i] = psizregtemp;
@@ -4435,8 +4424,8 @@ st_translate_program(
    /* Declare address register.
     */
    if (program->num_address_regs > 0) {
-      debug_assert( program->num_address_regs == 1 );
-      t->address[0] = ureg_DECL_address( ureg );
+      assert(program->num_address_regs == 1);
+      t->address[0] = ureg_DECL_address(ureg);
    }
 
    /* Declare misc input registers
@@ -4461,7 +4450,7 @@ st_translate_program(
        */
       for (i = 0; i < (unsigned)program->next_temp; i++) {
          /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
-         t->temps[i] = ureg_DECL_temporary( t->ureg );
+         t->temps[i] = ureg_DECL_temporary(t->ureg);
       }
    }
 
@@ -4469,7 +4458,7 @@ st_translate_program(
     * so we put all the translated regs in t->constants.
     */
    if (proginfo->Parameters) {
-      t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] );
+      t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
       if (t->constants == NULL) {
          ret = PIPE_ERROR_OUT_OF_MEMORY;
          goto out;
@@ -4482,7 +4471,7 @@ st_translate_program(
          case PROGRAM_STATE_VAR:
          case PROGRAM_NAMED_PARAM:
          case PROGRAM_UNIFORM:
-            t->constants[i] = ureg_DECL_constant( ureg, i );
+            t->constants[i] = ureg_DECL_constant(ureg, i);
             break;
 
          /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
@@ -4493,9 +4482,9 @@ st_translate_program(
           */
          case PROGRAM_CONSTANT:
             if (program->indirect_addr_consts)
-               t->constants[i] = ureg_DECL_constant( ureg, i );
+               t->constants[i] = ureg_DECL_constant(ureg, i);
             else
-               t->constants[i] = emit_immediate( t, proginfo->Parameters, i );
+               t->constants[i] = emit_immediate(t, proginfo->Parameters, i);
             break;
          default:
             break;
@@ -4505,27 +4494,28 @@ st_translate_program(
    
    /* Emit immediate values.
     */
-   t->immediates = (struct ureg_src *)CALLOC( program->immediates->NumParameters * sizeof(struct ureg_src) );
+   t->immediates = (struct ureg_src *)CALLOC(program->immediates->NumParameters * sizeof(struct ureg_src));
    if (t->immediates == NULL) {
       ret = PIPE_ERROR_OUT_OF_MEMORY;
       goto out;
    }
    for (i = 0; i < program->immediates->NumParameters; i++) {
-      t->immediates[i] = emit_immediate( t, program->immediates, i );
+      assert(program->immediates->Parameters[i].Type == PROGRAM_IMMEDIATE);
+      t->immediates[i] = emit_immediate(t, program->immediates, i);
    }
 
    /* texture samplers */
    for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
       if (program->samplers_used & (1 << i)) {
-         t->samplers[i] = ureg_DECL_sampler( ureg, i );
+         t->samplers[i] = ureg_DECL_sampler(ureg, i);
       }
    }
 
    /* Emit each instruction in turn:
     */
    foreach_iter(exec_list_iterator, iter, program->instructions) {
-      set_insn_start( t, ureg_get_instruction_number( ureg ));
-      compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() );
+      set_insn_start(t, ureg_get_instruction_number(ureg));
+      compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
 
       if (t->prevInstWrotePointSize && proginfo->Id) {
          /* The previous instruction wrote to the (fake) vertex point size
@@ -4535,14 +4525,14 @@ st_translate_program(
           * Note that we can't do this easily at the end of program due to
           * possible early return.
           */
-         set_insn_start( t, ureg_get_instruction_number( ureg ));
-         ureg_MAX( t->ureg,
-                   ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
-                   ureg_src(t->outputs[t->pointSizeOutIndex]),
-                   ureg_swizzle(t->pointSizeConst, 1,1,1,1));
-         ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
-                   ureg_src(t->outputs[t->pointSizeOutIndex]),
-                   ureg_swizzle(t->pointSizeConst, 2,2,2,2));
+         set_insn_start(t, ureg_get_instruction_number(ureg));
+         ureg_MAX(t->ureg,
+                  ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
+                  ureg_src(t->outputs[t->pointSizeOutIndex]),
+                  ureg_swizzle(t->pointSizeConst, 1,1,1,1));
+         ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
+                  ureg_src(t->outputs[t->pointSizeOutIndex]),
+                  ureg_swizzle(t->pointSizeConst, 2,2,2,2));
       }
       t->prevInstWrotePointSize = GL_FALSE;
    }
@@ -4550,9 +4540,8 @@ st_translate_program(
    /* Fix up all emitted labels:
     */
    for (i = 0; i < t->labels_count; i++) {
-      ureg_fixup_label( ureg,
-                        t->labels[i].token,
-                        t->insn[t->labels[i].branch_target] );
+      ureg_fixup_label(ureg, t->labels[i].token,
+                       t->insn[t->labels[i].branch_target]);
    }
 
 out:
@@ -4582,7 +4571,7 @@ get_mesa_program(struct gl_context *ctx,
    struct gl_program *prog;
    GLenum target;
    const char *target_string;
-   GLboolean progress;
+   bool progress;
    struct gl_shader_compiler_options *options =
          &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
 

From f751730ad003bb19ce85bc4d0abddaf40edde6c1 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 22 Jul 2011 13:24:42 -0500
Subject: [PATCH 178/600] glsl_to_tgsi: update comments

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 23 +++++++++++-----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 952900a1fb5..3a69a439822 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -236,7 +236,7 @@ public:
    /**
     * identifier of this function signature used by the program.
     *
-    * At the point that Mesa instructions for function calls are
+    * At the point that TGSI instructions for function calls are
     * generated, we don't know the address of the first instruction of
     * the function body.  So we make the BranchTarget that is called a
     * small integer and rewrite them during set_branchtargets().
@@ -251,10 +251,9 @@ public:
    glsl_to_tgsi_instruction *bgn_inst;
 
    /**
-    * Index of the first instruction of the function body in actual
-    * Mesa IR.
+    * Index of the first instruction of the function body in actual TGSI.
     *
-    * Set after convertion from glsl_to_tgsi_instruction to prog_instruction.
+    * Set after conversion from glsl_to_tgsi_instruction to TGSI.
     */
    int inst;
 
@@ -1672,7 +1671,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
    } else {
       st_src_reg array_base = this->result;
       /* Variable index array dereference.  It eats the "vec4" of the
-       * base of the array and an index that offsets the Mesa register
+       * base of the array and an index that offsets the TGSI register
        * index.
        */
       ir->array_index->accept(this);
@@ -1879,7 +1878,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       /* Swizzle a small RHS vector into the channels being written.
        *
        * glsl ir treats write_mask as dictating how many channels are
-       * present on the RHS while Mesa IR treats write_mask as just
+       * present on the RHS while TGSI treats write_mask as just
        * showing which channels of the vec4 RHS get written.
        */
       for (int i = 0; i < 4; i++) {
@@ -2202,8 +2201,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
 
    /* Put our coords in a temp.  We'll need to modify them for shadow,
     * projection, or LOD, so the only case we'd use it as is is if
-    * we're doing plain old texturing.  Mesa IR optimization should
-    * handle cleaning up our mess in that case.
+    * we're doing plain old texturing.  The optimization passes on
+    * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
     */
    coord = get_temp(glsl_type::vec4_type);
    coord_dst = st_dst_reg(coord);
@@ -3799,9 +3798,9 @@ static unsigned *get_label(struct st_translate *t, unsigned branch_target)
 }
 
 /**
- * Called prior to emitting the TGSI code for each Mesa instruction.
+ * Called prior to emitting the TGSI code for each instruction.
  * Allocate additional space for instructions if needed.
- * Update the insn[] array so the next Mesa instruction points to
+ * Update the insn[] array so the next glsl_to_tgsi_instruction points to
  * the next TGSI instruction.
  */
 static void set_insn_start(struct st_translate *t, unsigned start)
@@ -3856,7 +3855,7 @@ emit_immediate(struct st_translate *t,
 }
 
 /**
- * Map a Mesa dst register to a TGSI ureg_dst register.
+ * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
  */
 static struct ureg_dst
 dst_register(struct st_translate *t,
@@ -3898,7 +3897,7 @@ dst_register(struct st_translate *t,
 }
 
 /**
- * Map a Mesa src register to a TGSI ureg_src register.
+ * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
  */
 static struct ureg_src
 src_register(struct st_translate *t,

From 3354a5b56398f90fc36ab14b6444aae27b50e859 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 27 Jul 2011 15:20:19 -0500
Subject: [PATCH 179/600] glsl_to_tgsi: rework immediate tracking to not use
 gl_program_parameter_list

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 135 ++++++++++++++-------
 1 file changed, 88 insertions(+), 47 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 3a69a439822..6039488f26b 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -229,6 +229,20 @@ public:
    ir_variable *var; /* variable that maps to this, if any */
 };
 
+class immediate_storage : public exec_node {
+public:
+   immediate_storage(gl_constant_value *values, int size, int type)
+   {
+      memcpy(this->values, values, size * sizeof(gl_constant_value));
+      this->size = size;
+      this->type = type;
+   }
+   
+   gl_constant_value values[4];
+   int size; /**< Number of components (1-4) */
+   int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
+};
+
 class function_entry : public exec_node {
 public:
    ir_function_signature *sig;
@@ -272,7 +286,6 @@ public:
    struct gl_program *prog;
    struct gl_shader_program *shader_program;
    struct gl_shader_compiler_options *options;
-   struct gl_program_parameter_list *immediates;
 
    int next_temp;
 
@@ -285,6 +298,9 @@ public:
 
    variable_storage *find_variable_storage(ir_variable *var);
 
+   int add_constant(gl_register_file file, gl_constant_value values[4],
+                    int size, int datatype, GLuint *swizzle_out);
+
    function_entry *get_function_signature(ir_function_signature *sig);
 
    st_src_reg get_temp(const glsl_type *type);
@@ -326,6 +342,10 @@ public:
    /** List of variable_storage */
    exec_list variables;
 
+   /** List of immediate_storage */
+   exec_list immediates;
+   int num_immediates;
+
    /** List of function_entry */
    exec_list function_signatures;
    int next_signature_id;
@@ -808,6 +828,42 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
    }
 }
 
+int
+glsl_to_tgsi_visitor::add_constant(gl_register_file file,
+        		     gl_constant_value values[4], int size, int datatype,
+        		     GLuint *swizzle_out)
+{
+   if (file == PROGRAM_CONSTANT) {
+      return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
+                                              size, datatype, swizzle_out);
+   } else {
+      int index = 0;
+      immediate_storage *entry;
+      assert(file == PROGRAM_IMMEDIATE);
+      fprintf(stderr, "adding immediate\n");
+
+      /* Search immediate storage to see if we already have an identical
+       * immediate that we can use instead of adding a duplicate entry.
+       */
+      foreach_iter(exec_list_iterator, iter, this->immediates) {
+         entry = (immediate_storage *)iter.get();
+         
+         if (entry->size == size &&
+             entry->type == datatype &&
+             !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
+             return index;
+         }
+         index++;
+      }
+      
+      /* Add this immediate to the list. */
+      entry = new(mem_ctx) immediate_storage(values, size, datatype);
+      this->immediates.push_tail(entry);
+      this->num_immediates++;
+      return index;
+   }
+}
+
 struct st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 {
@@ -815,8 +871,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
    union gl_constant_value uval;
 
    uval.f = val;
-   src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1,
-                                                GL_FLOAT, &src.swizzle);
+   src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
 
    return src;
 }
@@ -830,8 +885,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
    assert(glsl_version >= 130);
 
    uval.i = val;
-   src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1,
-                                                GL_INT, &src.swizzle);
+   src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
 
    return src;
 }
@@ -1941,12 +1995,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
    gl_constant_value *values = (gl_constant_value *) stack_vals;
    GLenum gl_type = GL_NONE;
    unsigned int i;
-   gl_register_file file;
-   gl_program_parameter_list *param_list;
    static int in_array = 0;
-
-   file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
-   param_list = in_array ? this->prog->Parameters : this->immediates;
+   gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
 
    /* Unfortunately, 4 floats is all we can get into
     * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
@@ -2009,11 +2059,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
 
          src = st_src_reg(file, -1, ir->type->base_type);
-         src.index = _mesa_add_typed_unnamed_constant(param_list,
-                                                      values,
-                                                      ir->type->vector_elements,
-                                                      GL_FLOAT,
-                                                      &src.swizzle);
+         src.index = add_constant(file,
+                                  values,
+                                  ir->type->vector_elements,
+                                  GL_FLOAT,
+                                  &src.swizzle);
          emit(ir, TGSI_OPCODE_MOV, mat_column, src);
 
          mat_column.index++;
@@ -2062,9 +2112,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
    }
 
    this->result = st_src_reg(file, -1, ir->type);
-   this->result.index = _mesa_add_typed_unnamed_constant(param_list,
-        					   values, ir->type->vector_elements, gl_type,
-        					   &this->result.swizzle);
+   this->result.index = add_constant(file,
+                                     values,
+                                     ir->type->vector_elements,
+                                     gl_type,
+                                     &this->result.swizzle);
 }
 
 function_entry *
@@ -2441,17 +2493,16 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
    result.file = PROGRAM_UNDEFINED;
    next_temp = 1;
    next_signature_id = 1;
+   num_immediates = 0;
    current_function = NULL;
    num_address_regs = 0;
    indirect_addr_temps = false;
    indirect_addr_consts = false;
-   immediates = _mesa_new_parameter_list();
    mem_ctx = ralloc_context(NULL);
 }
 
 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
 {
-   _mesa_free_parameter_list(immediates);
    ralloc_free(mem_ctx);
 }
 
@@ -3538,8 +3589,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    v->samplers_used = prog->SamplersUsed = original->samplers_used;
    v->indirect_addr_temps = original->indirect_addr_temps;
    v->indirect_addr_consts = original->indirect_addr_consts;
-   _mesa_free_parameter_list(v->immediates);
-   v->immediates = _mesa_clone_parameter_list(original->immediates);
+   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
 
    /*
     * Get initial pixel color from the texture.
@@ -3667,8 +3717,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
    v->samplers_used = prog->SamplersUsed = original->samplers_used;
    v->indirect_addr_temps = original->indirect_addr_temps;
    v->indirect_addr_consts = original->indirect_addr_consts;
-   _mesa_free_parameter_list(v->immediates);
-   v->immediates = _mesa_clone_parameter_list(original->immediates);
+   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
 
    /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
    coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
@@ -3822,32 +3871,20 @@ static void set_insn_start(struct st_translate *t, unsigned start)
  */
 static struct ureg_src
 emit_immediate(struct st_translate *t,
-               struct gl_program_parameter_list *params,
-               int index)
+               gl_constant_value values[4],
+               int type, int size)
 {
    struct ureg_program *ureg = t->ureg;
 
-   switch(params->Parameters[index].DataType)
+   switch(type)
    {
    case GL_FLOAT:
-   case GL_FLOAT_VEC2:
-   case GL_FLOAT_VEC3:
-   case GL_FLOAT_VEC4:
-      return ureg_DECL_immediate(ureg, (float *)params->ParameterValues[index], 4);
+      return ureg_DECL_immediate(ureg, &values[0].f, size);
    case GL_INT:
-   case GL_INT_VEC2:
-   case GL_INT_VEC3:
-   case GL_INT_VEC4:
-      return ureg_DECL_immediate_int(ureg, (int *)params->ParameterValues[index], 4);
+      return ureg_DECL_immediate_int(ureg, &values[0].i, size);
    case GL_UNSIGNED_INT:
-   case GL_UNSIGNED_INT_VEC2:
-   case GL_UNSIGNED_INT_VEC3:
-   case GL_UNSIGNED_INT_VEC4:
    case GL_BOOL:
-   case GL_BOOL_VEC2:
-   case GL_BOOL_VEC3:
-   case GL_BOOL_VEC4:
-      return ureg_DECL_immediate_uint(ureg, (unsigned *)params->ParameterValues[index], 4);
+      return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
    default:
       assert(!"should not get here - type must be float, int, uint, or bool");
       return ureg_src_undef();
@@ -4483,7 +4520,10 @@ st_translate_program(
             if (program->indirect_addr_consts)
                t->constants[i] = ureg_DECL_constant(ureg, i);
             else
-               t->constants[i] = emit_immediate(t, proginfo->Parameters, i);
+               t->constants[i] = emit_immediate(t,
+                                                proginfo->Parameters->ParameterValues[i],
+                                                proginfo->Parameters->Parameters[i].DataType,
+                                                4);
             break;
          default:
             break;
@@ -4493,14 +4533,15 @@ st_translate_program(
    
    /* Emit immediate values.
     */
-   t->immediates = (struct ureg_src *)CALLOC(program->immediates->NumParameters * sizeof(struct ureg_src));
+   t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src));
    if (t->immediates == NULL) {
       ret = PIPE_ERROR_OUT_OF_MEMORY;
       goto out;
    }
-   for (i = 0; i < program->immediates->NumParameters; i++) {
-      assert(program->immediates->Parameters[i].Type == PROGRAM_IMMEDIATE);
-      t->immediates[i] = emit_immediate(t, program->immediates, i);
+   i = 0;
+   foreach_iter(exec_list_iterator, iter, program->immediates) {
+      immediate_storage *imm = (immediate_storage *)iter.get();
+      t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
    }
 
    /* texture samplers */

From 10d31cb307f90a08fafed5c67945ffe53d279940 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 27 Jul 2011 15:45:16 -0500
Subject: [PATCH 180/600] glsl_to_tgsi: lower all ir_quadop_vector expressions

Unlike Mesa IR, TGSI doesn't have a SWZ opcode.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 6039488f26b..0cbfc943a05 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4825,7 +4825,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
          progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
 
-         progress = lower_quadop_vector(ir, true) || progress;
+         progress = lower_quadop_vector(ir, false) || progress;
 
          if (options->EmitNoIfs) {
             progress = lower_discard(ir) || progress;

From 3e7fce9773ec332665326a785b6ed1fcf5bd578e Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 27 Jul 2011 16:36:10 -0500
Subject: [PATCH 181/600] glsl_to_tgsi: add each relative address to the
 previous

This is a glsl_to_tgsi port of commit d6e1a8f71437.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 0cbfc943a05..f66e240a177 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1741,6 +1741,18 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
               this->result, st_src_reg_for_float(element_size));
       }
 
+      /* If there was already a relative address register involved, add the
+       * new and the old together to get the new offset.
+       */
+      if (src.reladdr != NULL) {
+         st_src_reg accum_reg = get_temp(glsl_type::float_type);
+
+         emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
+              index_reg, *src.reladdr);
+
+         index_reg = accum_reg;
+      }
+
       src.reladdr = ralloc(mem_ctx, st_src_reg);
       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
    }

From 189e9f12c7d3a82d7dd28695935a83e4319bb267 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 27 Jul 2011 16:39:40 -0500
Subject: [PATCH 182/600] glsl_to_tgsi: copy reladdr in st_src_reg(st_dst_reg)
 constructor

This is a glsl_to_tgsi port of commit f7cd9a858c04.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f66e240a177..ba4074eecd5 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -174,7 +174,7 @@ st_src_reg::st_src_reg(st_dst_reg reg)
    this->index = reg.index;
    this->swizzle = SWIZZLE_XYZW;
    this->negate = 0;
-   this->reladdr = NULL;
+   this->reladdr = reg.reladdr;
 }
 
 st_dst_reg::st_dst_reg(st_src_reg reg)

From 81b036b4d79423c194596461b098a525af0102c2 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 30 Jul 2011 16:44:49 -0700
Subject: [PATCH 183/600] i965/gen5+: Fix incorrect miptree layout for
 non-power-of-two cubemaps.

For power-of-two sizes, h0 == mt->height0 since it's already a multiple
of two.  However, for NPOT, they're different; h1 should be computed
based on the original size.

Fixes piglit test "cubemap npot" and oglconform test "textureNPOT".

NOTE: This is a candidate for stable release branches.

Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_tex_layout.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index f462f32b19a..46a417a08ed 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -60,7 +60,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
 	   * given in Volume 1 of the BSpec.
 	   */
 	  h0 = ALIGN(mt->height0, align_h);
-	  h1 = ALIGN(minify(h0), align_h);
+	  h1 = ALIGN(minify(mt->height0), align_h);
 	  qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h);
           if (mt->compressed)
 	     qpitch /= 4;

From 586e741ac1fa222d041990b265e820f5aa11344d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Thu, 28 Jul 2011 14:04:09 -0700
Subject: [PATCH 184/600] linker: Make linker_error set LinkStatus to false

Remove the other places that set LinkStatus to false since they all
immediately follow a call to linker_error.  The function linker_error
was previously known as linker_error_printf.  The name was changed
because it may seem surprising that a printf function will set an
error flag.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/ir_function_detect_recursion.cpp |   4 +-
 src/glsl/link_functions.cpp               |   4 +-
 src/glsl/linker.cpp                       | 180 +++++++++++-----------
 src/glsl/linker.h                         |   2 +-
 4 files changed, 90 insertions(+), 100 deletions(-)

diff --git a/src/glsl/ir_function_detect_recursion.cpp b/src/glsl/ir_function_detect_recursion.cpp
index 44a1cd0b950..a3b461818d3 100644
--- a/src/glsl/ir_function_detect_recursion.cpp
+++ b/src/glsl/ir_function_detect_recursion.cpp
@@ -311,9 +311,7 @@ emit_errors_linked(const void *key, void *data, void *closure)
 				  f->sig->function_name(),
 				  &f->sig->parameters);
 
-   linker_error_printf(prog,
-		       "function `%s' has static recursion.\n",
-		       proto);
+   linker_error(prog, "function `%s' has static recursion.\n", proto);
    ralloc_free(proto);
    prog->LinkStatus = false;
 }
diff --git a/src/glsl/link_functions.cpp b/src/glsl/link_functions.cpp
index 7ba760daa1a..d40f771e342 100644
--- a/src/glsl/link_functions.cpp
+++ b/src/glsl/link_functions.cpp
@@ -91,8 +91,8 @@ public:
       if (sig == NULL) {
 	 /* FINISHME: Log the full signature of unresolved function.
 	  */
-	 linker_error_printf(this->prog, "unresolved reference to function "
-			     "`%s'\n", name);
+	 linker_error(this->prog, "unresolved reference to function `%s'\n",
+		      name);
 	 this->success = false;
 	 return visit_stop;
       }
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index fe570b6cc45..35d893f499e 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -164,7 +164,7 @@ private:
 
 
 void
-linker_error_printf(gl_shader_program *prog, const char *fmt, ...)
+linker_error(gl_shader_program *prog, const char *fmt, ...)
 {
    va_list ap;
 
@@ -172,6 +172,8 @@ linker_error_printf(gl_shader_program *prog, const char *fmt, ...)
    va_start(ap, fmt);
    ralloc_vasprintf_append(&prog->InfoLog, fmt, ap);
    va_end(ap);
+
+   prog->LinkStatus = false;
 }
 
 
@@ -243,8 +245,7 @@ validate_vertex_shader_executable(struct gl_shader_program *prog,
    find_assignment_visitor find("gl_Position");
    find.run(shader->ir);
    if (!find.variable_found()) {
-      linker_error_printf(prog,
-			  "vertex shader does not write to `gl_Position'\n");
+      linker_error(prog, "vertex shader does not write to `gl_Position'\n");
       return false;
    }
 
@@ -271,8 +272,8 @@ validate_fragment_shader_executable(struct gl_shader_program *prog,
    frag_data.run(shader->ir);
 
    if (frag_color.variable_found() && frag_data.variable_found()) {
-      linker_error_printf(prog,  "fragment shader writes to both "
-			  "`gl_FragColor' and `gl_FragData'\n");
+      linker_error(prog,  "fragment shader writes to both "
+		   "`gl_FragColor' and `gl_FragData'\n");
       return false;
    }
 
@@ -357,11 +358,11 @@ cross_validate_globals(struct gl_shader_program *prog,
 		     existing->type = var->type;
 		  }
 	       } else {
-		  linker_error_printf(prog, "%s `%s' declared as type "
-				      "`%s' and type `%s'\n",
-				      mode_string(var),
-				      var->name, var->type->name,
-				      existing->type->name);
+		  linker_error(prog, "%s `%s' declared as type "
+			       "`%s' and type `%s'\n",
+			       mode_string(var),
+			       var->name, var->type->name,
+			       existing->type->name);
 		  return false;
 	       }
 	    }
@@ -369,9 +370,9 @@ cross_validate_globals(struct gl_shader_program *prog,
 	    if (var->explicit_location) {
 	       if (existing->explicit_location
 		   && (var->location != existing->location)) {
-		     linker_error_printf(prog, "explicit locations for %s "
-					 "`%s' have differing values\n",
-					 mode_string(var), var->name);
+		     linker_error(prog, "explicit locations for %s "
+				  "`%s' have differing values\n",
+				  mode_string(var), var->name);
 		     return false;
 	       }
 
@@ -392,12 +393,12 @@ cross_validate_globals(struct gl_shader_program *prog,
            bool layout_declared = var->depth_layout != ir_depth_layout_none;
            bool layout_differs = var->depth_layout != existing->depth_layout;
            if (layout_declared && layout_differs) {
-              linker_error_printf(prog,
+              linker_error(prog,
                  "All redeclarations of gl_FragDepth in all fragment shaders "
                  "in a single program must have the same set of qualifiers.");
            }
            if (var->used && layout_differs) {
-              linker_error_printf(prog,
+              linker_error(prog,
                     "If gl_FragDepth is redeclared with a layout qualifier in"
                     "any fragment shader, it must be redeclared with the same"
                     "layout qualifier in all fragment shaders that have"
@@ -410,9 +411,9 @@ cross_validate_globals(struct gl_shader_program *prog,
 	    if (var->constant_value != NULL) {
 	       if (existing->constant_value != NULL) {
 		  if (!var->constant_value->has_value(existing->constant_value)) {
-		     linker_error_printf(prog, "initializers for %s "
-					 "`%s' have differing values\n",
-					 mode_string(var), var->name);
+		     linker_error(prog, "initializers for %s "
+				  "`%s' have differing values\n",
+				  mode_string(var), var->name);
 		     return false;
 		  }
 	       } else
@@ -433,15 +434,15 @@ cross_validate_globals(struct gl_shader_program *prog,
 	    }
 
 	    if (existing->invariant != var->invariant) {
-	       linker_error_printf(prog, "declarations for %s `%s' have "
-	                           "mismatching invariant qualifiers\n",
-	                           mode_string(var), var->name);
+	       linker_error(prog, "declarations for %s `%s' have "
+			    "mismatching invariant qualifiers\n",
+			    mode_string(var), var->name);
 	       return false;
 	    }
             if (existing->centroid != var->centroid) {
-               linker_error_printf(prog, "declarations for %s `%s' have "
-                                   "mismatching centroid qualifiers\n",
-                                   mode_string(var), var->name);
+               linker_error(prog, "declarations for %s `%s' have "
+			    "mismatching centroid qualifiers\n",
+			    mode_string(var), var->name);
                return false;
             }
 	 } else
@@ -529,13 +530,12 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
 	     */
 	    if (!output->type->is_array()
 		|| (strncmp("gl_", output->name, 3) != 0)) {
-	       linker_error_printf(prog,
-				   "%s shader output `%s' declared as "
-				   "type `%s', but %s shader input declared "
-				   "as type `%s'\n",
-				   producer_stage, output->name,
-				   output->type->name,
-				   consumer_stage, input->type->name);
+	       linker_error(prog,
+			    "%s shader output `%s' declared as type `%s', "
+			    "but %s shader input declared as type `%s'\n",
+			    producer_stage, output->name,
+			    output->type->name,
+			    consumer_stage, input->type->name);
 	       return false;
 	    }
 	 }
@@ -543,40 +543,40 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
 	 /* Check that all of the qualifiers match between stages.
 	  */
 	 if (input->centroid != output->centroid) {
-	    linker_error_printf(prog,
-				"%s shader output `%s' %s centroid qualifier, "
-				"but %s shader input %s centroid qualifier\n",
-				producer_stage,
-				output->name,
-				(output->centroid) ? "has" : "lacks",
-				consumer_stage,
-				(input->centroid) ? "has" : "lacks");
+	    linker_error(prog,
+			 "%s shader output `%s' %s centroid qualifier, "
+			 "but %s shader input %s centroid qualifier\n",
+			 producer_stage,
+			 output->name,
+			 (output->centroid) ? "has" : "lacks",
+			 consumer_stage,
+			 (input->centroid) ? "has" : "lacks");
 	    return false;
 	 }
 
 	 if (input->invariant != output->invariant) {
-	    linker_error_printf(prog,
-				"%s shader output `%s' %s invariant qualifier, "
-				"but %s shader input %s invariant qualifier\n",
-				producer_stage,
-				output->name,
-				(output->invariant) ? "has" : "lacks",
-				consumer_stage,
-				(input->invariant) ? "has" : "lacks");
+	    linker_error(prog,
+			 "%s shader output `%s' %s invariant qualifier, "
+			 "but %s shader input %s invariant qualifier\n",
+			 producer_stage,
+			 output->name,
+			 (output->invariant) ? "has" : "lacks",
+			 consumer_stage,
+			 (input->invariant) ? "has" : "lacks");
 	    return false;
 	 }
 
 	 if (input->interpolation != output->interpolation) {
-	    linker_error_printf(prog,
-				"%s shader output `%s' specifies %s "
-				"interpolation qualifier, "
-				"but %s shader input specifies %s "
-				"interpolation qualifier\n",
-				producer_stage,
-				output->name,
-				output->interpolation_string(),
-				consumer_stage,
-				input->interpolation_string());
+	    linker_error(prog,
+			 "%s shader output `%s' specifies %s "
+			 "interpolation qualifier, "
+			 "but %s shader input specifies %s "
+			 "interpolation qualifier\n",
+			 producer_stage,
+			 output->name,
+			 output->interpolation_string(),
+			 consumer_stage,
+			 input->interpolation_string());
 	    return false;
 	 }
       }
@@ -823,9 +823,8 @@ link_intrastage_shaders(void *mem_ctx,
 
 	       if ((other_sig != NULL) && other_sig->is_defined
 		   && !other_sig->is_builtin) {
-		  linker_error_printf(prog,
-				      "function `%s' is multiply defined",
-				      f->name);
+		  linker_error(prog, "function `%s' is multiply defined",
+			       f->name);
 		  return NULL;
 	       }
 	    }
@@ -849,9 +848,9 @@ link_intrastage_shaders(void *mem_ctx,
    }
 
    if (main == NULL) {
-      linker_error_printf(prog, "%s shader lacks `main'\n",
-			  (shader_list[0]->Type == GL_VERTEX_SHADER)
-			  ? "vertex" : "fragment");
+      linker_error(prog, "%s shader lacks `main'\n",
+		   (shader_list[0]->Type == GL_VERTEX_SHADER)
+		   ? "vertex" : "fragment");
       return NULL;
    }
 
@@ -1309,10 +1308,10 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
 	  * attribute overlaps any previously allocated bits.
 	  */
 	 if ((~(use_mask << attr) & used_locations) != used_locations) {
-	    linker_error_printf(prog,
-				"insufficient contiguous attribute locations "
-				"available for vertex shader input `%s'",
-				var->name);
+	    linker_error(prog,
+			 "insufficient contiguous attribute locations "
+			 "available for vertex shader input `%s'",
+			 var->name);
 	    return false;
 	 }
 
@@ -1353,11 +1352,10 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
 
 	 if ((var->location >= (int)(max_index + generic_base))
 	     || (var->location < 0)) {
-	    linker_error_printf(prog,
-				"invalid explicit location %d specified for "
-				"`%s'\n",
-				(var->location < 0) ? var->location : attr,
-				var->name);
+	    linker_error(prog,
+			 "invalid explicit location %d specified for `%s'\n",
+			 (var->location < 0) ? var->location : attr,
+			 var->name);
 	    return false;
 	 } else if (var->location >= generic_base) {
 	    used_locations |= (use_mask << attr);
@@ -1406,10 +1404,10 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
 	 const char *const string = (target_index == MESA_SHADER_VERTEX)
 	    ? "vertex shader input" : "fragment shader output";
 
-	 linker_error_printf(prog,
-			     "insufficient contiguous attribute locations "
-			     "available for %s `%s'",
-			     string, to_assign[i].var->name);
+	 linker_error(prog,
+		      "insufficient contiguous attribute locations "
+		      "available for %s `%s'",
+		      string, to_assign[i].var->name);
 	 return false;
       }
 
@@ -1525,9 +1523,8 @@ assign_varying_locations(struct gl_context *ctx,
 	     * "glsl1-varying read but not written" in piglit.
 	     */
 
-	    linker_error_printf(prog, "fragment shader varying %s not written "
-				"by vertex shader\n.", var->name);
-	    prog->LinkStatus = false;
+	    linker_error(prog, "fragment shader varying %s not written "
+			 "by vertex shader\n.", var->name);
 	 }
 
 	 /* An 'in' variable is only really a shader input if its
@@ -1544,17 +1541,17 @@ assign_varying_locations(struct gl_context *ctx,
 
    if (ctx->API == API_OPENGLES2 || prog->Version == 100) {
       if (varying_vectors > ctx->Const.MaxVarying) {
-	 linker_error_printf(prog, "shader uses too many varying vectors "
-			     "(%u > %u)\n",
-			     varying_vectors, ctx->Const.MaxVarying);
+	 linker_error(prog, "shader uses too many varying vectors "
+		      "(%u > %u)\n",
+		      varying_vectors, ctx->Const.MaxVarying);
 	 return false;
       }
    } else {
       const unsigned float_components = varying_vectors * 4;
       if (float_components > ctx->Const.MaxVarying * 4) {
-	 linker_error_printf(prog, "shader uses too many varying components "
-			     "(%u > %u)\n",
-			     float_components, ctx->Const.MaxVarying * 4);
+	 linker_error(prog, "shader uses too many varying components "
+		      "(%u > %u)\n",
+		      float_components, ctx->Const.MaxVarying * 4);
 	 return false;
       }
    }
@@ -1618,8 +1615,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    assert(max_version <= 130);
    if ((max_version >= 130 || min_version == 100)
        && min_version != max_version) {
-      linker_error_printf(prog, "all shaders must use same shading "
-			  "language version\n");
+      linker_error(prog, "all shaders must use same shading "
+		   "language version\n");
       goto done;
    }
 
@@ -1720,12 +1717,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
     * FINISHME: at least 16, so hardcode 16 for now.
     */
    if (!assign_attribute_or_color_locations(prog, MESA_SHADER_VERTEX, 16)) {
-      prog->LinkStatus = false;
       goto done;
    }
 
    if (!assign_attribute_or_color_locations(prog, MESA_SHADER_FRAGMENT, ctx->Const.MaxDrawBuffers)) {
-      prog->LinkStatus = false;
       goto done;
    }
 
@@ -1742,7 +1737,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
       if (!assign_varying_locations(ctx, prog,
 				    prog->_LinkedShaders[prev],
 				    prog->_LinkedShaders[i])) {
-	 prog->LinkStatus = false;
 	 goto done;
       }
 
@@ -1774,11 +1768,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
     */
    if (ctx->API == API_OPENGLES2 || prog->Version == 100) {
       if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
-	 linker_error_printf(prog, "program lacks a vertex shader\n");
-	 prog->LinkStatus = false;
+	 linker_error(prog, "program lacks a vertex shader\n");
       } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
-	 linker_error_printf(prog, "program lacks a fragment shader\n");
-	 prog->LinkStatus = false;
+	 linker_error(prog, "program lacks a fragment shader\n");
       }
    }
 
diff --git a/src/glsl/linker.h b/src/glsl/linker.h
index a8ce16a7ec1..dfae073c274 100644
--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -26,7 +26,7 @@
 #define GLSL_LINKER_H
 
 extern void
-linker_error_printf(gl_shader_program *prog, const char *fmt, ...);
+linker_error(gl_shader_program *prog, const char *fmt, ...);
 
 extern bool
 link_function_calls(gl_shader_program *prog, gl_shader *main,

From 379a32f42ebca9feeb024633f7774661619fd62e Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Thu, 28 Jul 2011 14:09:06 -0700
Subject: [PATCH 185/600] linker: Make linker_{error,warning} generally
 available

linker_warning is a new function.  It's identical to linker_error
except that it doesn't set LinkStatus=false and it prepends "warning: "
on messages instead of "error: ".

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/ir_function_detect_recursion.cpp |  1 +
 src/glsl/linker.cpp                       | 13 +++++++++++++
 src/glsl/linker.h                         |  3 ---
 src/glsl/program.h                        |  8 ++++++++
 4 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/src/glsl/ir_function_detect_recursion.cpp b/src/glsl/ir_function_detect_recursion.cpp
index a3b461818d3..8f805bf1ba9 100644
--- a/src/glsl/ir_function_detect_recursion.cpp
+++ b/src/glsl/ir_function_detect_recursion.cpp
@@ -125,6 +125,7 @@
 #include "glsl_parser_extras.h"
 #include "linker.h"
 #include "program/hash_table.h"
+#include "program.h"
 
 struct call_node : public exec_node {
    class function *func;
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 35d893f499e..19eb9b5ff6f 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -177,6 +177,19 @@ linker_error(gl_shader_program *prog, const char *fmt, ...)
 }
 
 
+void
+linker_warning(gl_shader_program *prog, const char *fmt, ...)
+{
+   va_list ap;
+
+   ralloc_strcat(&prog->InfoLog, "error: ");
+   va_start(ap, fmt);
+   ralloc_vasprintf_append(&prog->InfoLog, fmt, ap);
+   va_end(ap);
+
+}
+
+
 void
 invalidate_variable_locations(gl_shader *sh, enum ir_variable_mode mode,
 			      int generic_base)
diff --git a/src/glsl/linker.h b/src/glsl/linker.h
index dfae073c274..769cf68b6ad 100644
--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -25,9 +25,6 @@
 #ifndef GLSL_LINKER_H
 #define GLSL_LINKER_H
 
-extern void
-linker_error(gl_shader_program *prog, const char *fmt, ...);
-
 extern bool
 link_function_calls(gl_shader_program *prog, gl_shader *main,
 		    gl_shader **shader_list, unsigned num_shaders);
diff --git a/src/glsl/program.h b/src/glsl/program.h
index db602fa9ec2..437ca1462fa 100644
--- a/src/glsl/program.h
+++ b/src/glsl/program.h
@@ -25,3 +25,11 @@
 
 extern void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog);
+
+extern void
+linker_error(gl_shader_program *prog, const char *fmt, ...)
+   PRINTFLIKE(2, 3);
+
+extern void
+linker_warning(gl_shader_program *prog, const char *fmt, ...)
+   PRINTFLIKE(2, 3);

From 89193933cbd322cd08fb54232411a8a9221fcca8 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Thu, 28 Jul 2011 15:10:17 -0700
Subject: [PATCH 186/600] mesa: Ensure that gl_shader_program::InfoLog is never
 NULL

This prevents assertion failures in ralloc_strcat.  The ralloc_free in
_mesa_free_shader_program_data can be omitted because freeing the
gl_shader_program in _mesa_delete_shader_program will take care of
this automatically.

A bunch of this code could use a refactor to use ralloc a bit more
effectively.  A bunch of the things that are allocated with malloc and
owned by the gl_shader_program should be allocated with ralloc (using
the gl_shader_program as the context).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/main.cpp         |  1 +
 src/mesa/main/shaderobj.c | 11 ++++++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index 9f85096e1a1..9b8a50738ac 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -221,6 +221,7 @@ main(int argc, char **argv)
 
    whole_program = rzalloc (NULL, struct gl_shader_program);
    assert(whole_program != NULL);
+   whole_program->InfoLog = ralloc_strdup(whole_program, "");
 
    for (/* empty */; argc > optind; optind++) {
       whole_program->Shaders =
diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index 33d91ad594d..f128648f477 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -244,6 +244,8 @@ _mesa_init_shader_program(struct gl_context *ctx, struct gl_shader_program *prog
    prog->Geom.InputType = GL_TRIANGLES;
    prog->Geom.OutputType = GL_TRIANGLE_STRIP;
 #endif
+
+   prog->InfoLog = ralloc_strdup(prog, "");
 }
 
 /**
@@ -283,6 +285,10 @@ _mesa_clear_shader_program_data(struct gl_context *ctx,
       _mesa_free_parameter_list(shProg->Varying);
       shProg->Varying = NULL;
    }
+
+   assert(shProg->InfoLog != NULL);
+   ralloc_free(shProg->InfoLog);
+   shProg->InfoLog = ralloc_strdup(shProg, "");
 }
 
 
@@ -317,11 +323,6 @@ _mesa_free_shader_program_data(struct gl_context *ctx,
       shProg->Shaders = NULL;
    }
 
-   if (shProg->InfoLog) {
-      ralloc_free(shProg->InfoLog);
-      shProg->InfoLog = NULL;
-   }
-
    /* Transform feedback varying vars */
    for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) {
       free(shProg->TransformFeedback.VaryingNames[i]);

From 8aadd89d07d750aadd10989fa9c81f8a2fdd98e2 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 25 Jul 2011 15:55:59 -0700
Subject: [PATCH 187/600] ir_to_mesa: Use Add linker_error instead of fail_link

The functions were almost identical.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 53 ++++++++++++++-------------------
 1 file changed, 22 insertions(+), 31 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 8b4a535b75f..a0188128e2a 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -331,20 +331,6 @@ dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
 
 dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
 
-static void
-fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
-
-static void
-fail_link(struct gl_shader_program *prog, const char *fmt, ...)
-{
-   va_list args;
-   va_start(args, fmt);
-   ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
-   va_end(args);
-
-   prog->LinkStatus = GL_FALSE;
-}
-
 static int
 swizzle_for_size(int size)
 {
@@ -789,10 +775,11 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
 
       if (storage->file == PROGRAM_TEMPORARY &&
 	  dst.index != storage->index + (int) ir->num_state_slots) {
-	 fail_link(this->shader_program,
-		   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
-		   ir->name, dst.index - storage->index,
-		   type_size(ir->type));
+	 linker_error(this->shader_program,
+		      "failed to load builtin uniform `%s' "
+		      "(%d/%d regs loaded)\n",
+		      ir->name, dst.index - storage->index,
+		      type_size(ir->type));
       }
    }
 }
@@ -2413,29 +2400,32 @@ check_resources(const struct gl_context *ctx,
    case GL_VERTEX_PROGRAM_ARB:
       if (_mesa_bitcount(prog->SamplersUsed) >
           ctx->Const.MaxVertexTextureImageUnits) {
-         fail_link(shader_program, "Too many vertex shader texture samplers");
+         linker_error(shader_program,
+		      "Too many vertex shader texture samplers");
       }
       if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
-         fail_link(shader_program, "Too many vertex shader constants");
+         linker_error(shader_program, "Too many vertex shader constants");
       }
       break;
    case MESA_GEOMETRY_PROGRAM:
       if (_mesa_bitcount(prog->SamplersUsed) >
           ctx->Const.MaxGeometryTextureImageUnits) {
-         fail_link(shader_program, "Too many geometry shader texture samplers");
+         linker_error(shader_program,
+		      "Too many geometry shader texture samplers");
       }
       if (prog->Parameters->NumParameters >
           MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
-         fail_link(shader_program, "Too many geometry shader constants");
+         linker_error(shader_program, "Too many geometry shader constants");
       }
       break;
    case GL_FRAGMENT_PROGRAM_ARB:
       if (_mesa_bitcount(prog->SamplersUsed) >
           ctx->Const.MaxTextureImageUnits) {
-         fail_link(shader_program, "Too many fragment shader texture samplers");
+         linker_error(shader_program,
+		      "Too many fragment shader texture samplers");
       }
       if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
-         fail_link(shader_program, "Too many fragment shader constants");
+         linker_error(shader_program, "Too many fragment shader constants");
       }
       break;
    default:
@@ -2550,9 +2540,10 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
 	  * from _mesa_add_uniform) has to match what the linker chose.
 	  */
 	 if (index != parameter_index) {
-	    fail_link(shader_program, "Allocation of uniform `%s' to target "
-		      "failed (%d vs %d)\n",
-		      uniform->Name, index, parameter_index);
+	    linker_error(shader_program,
+			 "Allocation of uniform `%s' to target failed "
+			 "(%d vs %d)\n",
+			 uniform->Name, index, parameter_index);
 	 }
       }
    }
@@ -2585,8 +2576,8 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
    int loc = _mesa_get_uniform_location(ctx, shader_program, name);
 
    if (loc == -1) {
-      fail_link(shader_program,
-		"Couldn't find uniform for initializer %s\n", name);
+      linker_error(shader_program,
+		   "Couldn't find uniform for initializer %s\n", name);
       return;
    }
 
@@ -2987,7 +2978,7 @@ get_mesa_program(struct gl_context *ctx,
             prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
 
       if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
-	 fail_link(shader_program, "Couldn't flatten if statement\n");
+	 linker_error(shader_program, "Couldn't flatten if statement\n");
       }
 
       switch (mesa_inst->Opcode) {
@@ -3258,7 +3249,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
    for (i = 0; i < prog->NumShaders; i++) {
       if (!prog->Shaders[i]->CompileStatus) {
-	 fail_link(prog, "linking with uncompiled shader");
+	 linker_error(prog, "linking with uncompiled shader");
 	 prog->LinkStatus = GL_FALSE;
       }
    }

From 322c3bf9dc4c6edbf5a8793475ce1307e1c0186b Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 25 Jul 2011 15:58:07 -0700
Subject: [PATCH 188/600] ir_to_mesa: Emit warnings instead of errors for IR
 that can't be lowered

Rely on the driver to do the right thing.  This probably means falling
back to software.  Page 88 of the OpenGL 2.1 spec specifically says:

    "A shader should not fail to compile, and a program object should
    not fail to link due to lack of instruction space or lack of
    temporary variables. Implementations should ensure that all valid
    shaders and program objects may be successfully compiled, linked
    and executed."

There is no provision for saying "No" to a valid shader that is
difficult for the hardware to handle, so stop doing that.

On i915 this causes a large number of piglit tests to change from FAIL
to WARN.  The warning is because the driver still emits messages to
stderr like "i915_program_error: Unsupported opcode: BGNLOOP".

It also fixes ES2 conformance CorrectFull_frag and CorrectParse1_frag
on i915 (and probably other hardware that can't handle loops).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index a0188128e2a..382cda0c703 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2977,11 +2977,31 @@ get_mesa_program(struct gl_context *ctx,
          if (mesa_inst->SrcReg[src].RelAddr)
             prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
 
-      if (options->EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
-	 linker_error(shader_program, "Couldn't flatten if statement\n");
-      }
-
       switch (mesa_inst->Opcode) {
+      case OPCODE_IF:
+	 if (options->EmitNoIfs) {
+	    linker_warning(shader_program,
+			   "Couldn't flatten if-statement.  "
+			   "This will likely result in software "
+			   "rasterization.\n");
+	 }
+	 break;
+      case OPCODE_BGNLOOP:
+	 if (options->EmitNoLoops) {
+	    linker_warning(shader_program,
+			   "Couldn't unroll loop.  "
+			   "This will likely result in software "
+			   "rasterization.\n");
+	 }
+	 break;
+      case OPCODE_CONT:
+	 if (options->EmitNoCont) {
+	    linker_warning(shader_program,
+			   "Couldn't lower continue-statement.  "
+			   "This will likely result in software "
+			   "rasterization.\n");
+	 }
+	 break;
       case OPCODE_BGNSUB:
 	 inst->function->inst = i;
 	 mesa_inst->Comment = strdup(inst->function->sig->function_name());

From 3bb2f0dde1cd813a0b5e0b45be376f4d6606aeb8 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 25 Jul 2011 16:41:39 -0700
Subject: [PATCH 189/600] i915: Fail without crashing if a Mesa IR program uses
 too many registers

This can only happen in GLSL shaders because assembly shaders that use
too many temps are rejected by core Mesa.  It is easiest to make this
happen with shaders that contain flow-control that could not be lowered.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i915/i915_fragprog.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 6e1d7092237..32050cebf33 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -303,7 +303,7 @@ do {									\
 /* 
  * TODO: consider moving this into core 
  */
-static void calc_live_regs( struct i915_fragment_program *p )
+static bool calc_live_regs( struct i915_fragment_program *p )
 {
     const struct gl_fragment_program *program = &p->FragProg;
     GLuint regsUsed = 0xffff0000;
@@ -317,6 +317,9 @@ static void calc_live_regs( struct i915_fragment_program *p )
 
         /* Register is written to: unmark as live for this and preceeding ops */ 
         if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+	    if (inst->DstReg.Index > 16)
+	       return false;
+
             live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask;
             if (live_components[inst->DstReg.Index] == 0)
                 regsUsed &= ~(1 << inst->DstReg.Index);
@@ -327,6 +330,9 @@ static void calc_live_regs( struct i915_fragment_program *p )
             if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
                 unsigned c;
 
+		if (inst->SrcReg[a].Index > 16)
+		   return false;
+
                 regsUsed |= 1 << inst->SrcReg[a].Index;
 
                 for (c = 0; c < 4; c++) {
@@ -340,6 +346,8 @@ static void calc_live_regs( struct i915_fragment_program *p )
 
         p->usedRegs[i] = regsUsed;
     }
+
+    return true;
 }
 
 static GLuint get_live_regs( struct i915_fragment_program *p, 
@@ -394,7 +402,10 @@ upload_program(struct i915_fragment_program *p)
 
    /* Not always needed:
     */
-   calc_live_regs(p);
+   if (!calc_live_regs(p)) {
+      i915_program_error(p, "Could not allocate registers");
+      return;
+   }
 
    while (1) {
       GLuint src0, src1, src2, flags;

From 0290a018a50bd4a3180af3233f145f4de7b63706 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Thu, 10 Feb 2011 13:20:26 -0800
Subject: [PATCH 190/600] i915: Only emit program errors when INTEL_DEBUG=wm or
 INTEL_DEBUG=fallbacks

This makes piglit a lot more happy.  The errors are logged when
INTEL_DEBUG=fallbacks because the application is about to hit a big
software fallback.  We frequently ask people to run applications that
are hitting software fallbacks with INTEL_DEBUG=fallbacks so the we
can help them debug the reason for the software fallback.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i915/i915_program.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c
index ca1949b223e..0a600d30bef 100644
--- a/src/mesa/drivers/dri/i915/i915_program.c
+++ b/src/mesa/drivers/dri/i915/i915_program.c
@@ -442,14 +442,16 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values)
 void
 i915_program_error(struct i915_fragment_program *p, const char *fmt, ...)
 {
-   va_list args;
+   if (unlikely((INTEL_DEBUG & (DEBUG_WM | DEBUG_FALLBACKS)) != 0)) {
+      va_list args;
 
-   fprintf(stderr, "i915_program_error: ");
-   va_start(args, fmt);
-   vfprintf(stderr, fmt, args);
-   va_end(args);
+      fprintf(stderr, "i915_program_error: ");
+      va_start(args, fmt);
+      vfprintf(stderr, fmt, args);
+      va_end(args);
 
-   fprintf(stderr, "\n");
+      fprintf(stderr, "\n");
+   }
    p->error = 1;
 }
 

From be7407b75b12c70e1925c10117937ae2b9e6711f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 2 Aug 2011 01:04:58 +0200
Subject: [PATCH 191/600] gallium/util: add functions for manipulating swizzles

Some of those have been in drivers already.
---
 src/gallium/auxiliary/util/u_format.c         | 50 +++++++++++++++++++
 src/gallium/auxiliary/util/u_format.h         | 19 +++++++
 src/gallium/drivers/r300/r300_fs.c            |  5 +-
 src/gallium/drivers/r300/r300_state_derived.c | 18 +------
 src/gallium/drivers/r300/r300_texture.c       | 14 +-----
 src/gallium/drivers/r300/r300_texture.h       |  4 --
 src/gallium/drivers/r600/r600_texture.c       |  6 +--
 7 files changed, 75 insertions(+), 41 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c
index 9cbdd0a5b99..3a8aeab5fed 100644
--- a/src/gallium/auxiliary/util/u_format.c
+++ b/src/gallium/auxiliary/util/u_format.c
@@ -390,3 +390,53 @@ util_format_translate(enum pipe_format dst_format,
       FREE(tmp_row);
    }
 }
+
+void util_format_compose_swizzles(const unsigned char swz1[4],
+                                  const unsigned char swz2[4],
+                                  unsigned char dst[4])
+{
+   unsigned i;
+
+   for (i = 0; i < 4; i++) {
+      dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ?
+               swz1[swz2[i]] : swz2[i];
+   }
+}
+
+void util_format_swizzle_4f(float *dst, const float *src,
+                            const unsigned char swz[4])
+{
+   unsigned i;
+
+   for (i = 0; i < 4; i++) {
+      if (swz[i] < UTIL_FORMAT_SWIZZLE_W)
+         dst[i] = src[swz[i]];
+      else if (swz[i] == UTIL_FORMAT_SWIZZLE_0)
+         dst[i] = 0;
+      else if (swz[i] == UTIL_FORMAT_SWIZZLE_1)
+         dst[i] = 1;
+   }
+}
+
+void util_format_unswizzle_4f(float *dst, const float *src,
+                              const unsigned char swz[4])
+{
+   unsigned i;
+
+   for (i = 0; i < 4; i++) {
+      switch (swz[i]) {
+      case UTIL_FORMAT_SWIZZLE_X:
+         dst[0] = src[i];
+         break;
+      case UTIL_FORMAT_SWIZZLE_Y:
+         dst[1] = src[i];
+         break;
+      case UTIL_FORMAT_SWIZZLE_Z:
+         dst[2] = src[i];
+         break;
+      case UTIL_FORMAT_SWIZZLE_W:
+         dst[3] = src[i];
+         break;
+      }
+   }
+}
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index bb3ed72e932..566fa79e781 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -815,6 +815,25 @@ util_format_translate(enum pipe_format dst_format,
                       unsigned src_x, unsigned src_y,
                       unsigned width, unsigned height);
 
+/*
+ * Swizzle operations.
+ */
+
+/* Compose two sets of swizzles.
+ * If V is a 4D vector and the function parameters represent functions that
+ * swizzle vector components, this holds:
+ *     swz2(swz1(V)) = dst(V)
+ */
+void util_format_compose_swizzles(const unsigned char swz1[4],
+                                  const unsigned char swz2[4],
+                                  unsigned char dst[4]);
+
+void util_format_swizzle_4f(float *dst, const float *src,
+                            const unsigned char swz[4]);
+
+void util_format_unswizzle_4f(float *dst, const float *src,
+                              const unsigned char swz[4]);
+
 #ifdef __cplusplus
 } // extern "C" {
 #endif
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index a9fd3ad40dd..6f21125f70a 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -180,9 +180,10 @@ static void get_external_state(
             v->base.format == PIPE_FORMAT_LATC1_SNORM) {
             unsigned char swizzle[4];
 
-            util_format_combine_swizzles(swizzle,
+            util_format_compose_swizzles(
                             util_format_description(v->base.format)->swizzle,
-                            v->swizzle);
+                            v->swizzle,
+                            swizzle);
 
             state->unit[i].texture_swizzle =
                     RC_MAKE_SWIZZLE(swizzle[0], swizzle[1],
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index f63114e7eb7..45c11fce1fe 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -605,7 +605,6 @@ static uint32_t r300_get_border_color(enum pipe_format format,
 {
     const struct util_format_description *desc;
     float border_swizzled[4] = {0};
-    unsigned i;
     union util_color uc = {0};
 
     desc = util_format_description(format);
@@ -629,22 +628,7 @@ static uint32_t r300_get_border_color(enum pipe_format format,
     }
 
     /* Apply inverse swizzle of the format. */
-    for (i = 0; i < 4; i++) {
-        switch (desc->swizzle[i]) {
-        case UTIL_FORMAT_SWIZZLE_X:
-            border_swizzled[0] = border[i];
-            break;
-        case UTIL_FORMAT_SWIZZLE_Y:
-            border_swizzled[1] = border[i];
-            break;
-        case UTIL_FORMAT_SWIZZLE_Z:
-            border_swizzled[2] = border[i];
-            break;
-        case UTIL_FORMAT_SWIZZLE_W:
-            border_swizzled[3] = border[i];
-            break;
-        }
-    }
+    util_format_unswizzle_4f(border_swizzled, border, desc->swizzle);
 
     /* Compressed formats. */
     if (util_format_is_compressed(format)) {
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 08fccbe51c5..fc84004fb97 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -38,18 +38,6 @@
 
 #include "pipe/p_screen.h"
 
-void util_format_combine_swizzles(unsigned char *dst,
-                                  const unsigned char *swz1,
-                                  const unsigned char *swz2)
-{
-    unsigned i;
-
-    for (i = 0; i < 4; i++) {
-        dst[i] = swz2[i] <= UTIL_FORMAT_SWIZZLE_W ?
-                 swz1[swz2[i]] : swz2[i];
-    }
-}
-
 unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
                                    const unsigned char *swizzle_view,
                                    boolean dxtc_swizzle)
@@ -72,7 +60,7 @@ unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
 
     if (swizzle_view) {
         /* Combine two sets of swizzles. */
-        util_format_combine_swizzles(swizzle, swizzle_format, swizzle_view);
+        util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle);
     } else {
         memcpy(swizzle, swizzle_format, 4);
     }
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 4586bb2e4dc..158a387478f 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -35,10 +35,6 @@ struct r300_texture_desc;
 struct r300_resource;
 struct r300_screen;
 
-void util_format_combine_swizzles(unsigned char *dst,
-                                  const unsigned char *swz1,
-                                  const unsigned char *swz2);
-
 unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
                                    const unsigned char *swizzle_view,
                                    boolean dxtc_swizzle);
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index e9e8b277243..927eb5dafc9 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -754,11 +754,7 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
 	};
 
 	if (swizzle_view) {
-		/* Combine two sets of swizzles. */
-		for (i = 0; i < 4; i++) {
-			swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ?
-				swizzle_format[swizzle_view[i]] : swizzle_view[i];
-		}
+		util_format_compose_swizzles(swizzle_format, swizzle_view, swizzle);
 	} else {
 		memcpy(swizzle, swizzle_format, 4);
 	}

From f6df430a85141f6a384c18079fb5b2ad848dac0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 22 Jul 2011 18:45:30 +0200
Subject: [PATCH 192/600] r600g: remove unused code

---
 src/gallium/drivers/r600/r600.h               |   1 -
 src/gallium/winsys/r600/drm/Makefile          |   1 -
 src/gallium/winsys/r600/drm/SConscript        |   1 -
 src/gallium/winsys/r600/drm/bof.c             | 477 ------------------
 src/gallium/winsys/r600/drm/bof.h             |  90 ----
 .../winsys/r600/drm/evergreen_hw_context.c    |   1 -
 src/gallium/winsys/r600/drm/r600_hw_context.c |  83 ---
 7 files changed, 654 deletions(-)
 delete mode 100644 src/gallium/winsys/r600/drm/bof.c
 delete mode 100644 src/gallium/winsys/r600/drm/bof.h

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 61adc7ed988..d2b03418ede 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -291,7 +291,6 @@ void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r6
 void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
 void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
 void r600_context_flush(struct r600_context *ctx);
-void r600_context_dump_bof(struct r600_context *ctx, const char *file);
 void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
 
 struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type);
diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile
index fb7b09b3a0d..1d0de31c65a 100644
--- a/src/gallium/winsys/r600/drm/Makefile
+++ b/src/gallium/winsys/r600/drm/Makefile
@@ -5,7 +5,6 @@ include $(TOP)/configs/current
 LIBNAME = r600winsys
 
 C_SOURCES = \
-	bof.c \
 	evergreen_hw_context.c \
 	radeon_bo.c \
 	radeon_pciid.c \
diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript
index f55bb265226..efcedc6bff9 100644
--- a/src/gallium/winsys/r600/drm/SConscript
+++ b/src/gallium/winsys/r600/drm/SConscript
@@ -3,7 +3,6 @@ Import('*')
 env = env.Clone()
 
 r600_sources = [
-    'bof.c',
     'evergreen_hw_context.c',
     'radeon_bo.c',
     'radeon_pciid.c',
diff --git a/src/gallium/winsys/r600/drm/bof.c b/src/gallium/winsys/r600/drm/bof.c
deleted file mode 100644
index 5c923ad38d6..00000000000
--- a/src/gallium/winsys/r600/drm/bof.c
+++ /dev/null
@@ -1,477 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *      Jerome Glisse
- */
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include "bof.h"
-
-/*
- * helpers
- */
-static int bof_entry_grow(bof_t *bof)
-{
-	bof_t **array;
-
-	if (bof->array_size < bof->nentry)
-		return 0;
-	array = realloc(bof->array, (bof->nentry + 16) * sizeof(void*));
-	if (array == NULL)
-		return -ENOMEM;
-	bof->array = array;
-	bof->nentry += 16;
-	return 0;
-}
-
-/*
- * object
- */
-bof_t *bof_object(void)
-{
-	bof_t *object;
-
-	object = calloc(1, sizeof(bof_t));
-	if (object == NULL)
-		return NULL;
-	object->refcount = 1;
-	object->type = BOF_TYPE_OBJECT;
-	object->size = 12;
-	return object;
-}
-
-bof_t *bof_object_get(bof_t *object, const char *keyname)
-{
-	unsigned i;
-
-	for (i = 0; i < object->array_size; i += 2) {
-		if (!strcmp(object->array[i]->value, keyname)) {
-			return object->array[i + 1];
-		}
-	}
-	return NULL;
-}
-
-int bof_object_set(bof_t *object, const char *keyname, bof_t *value)
-{
-	bof_t *key;
-	int r;
-
-	if (object->type != BOF_TYPE_OBJECT)
-		return -EINVAL;
-	r = bof_entry_grow(object);
-	if (r)
-		return r;
-	key = bof_string(keyname);
-	if (key == NULL)
-		return -ENOMEM;
-	object->array[object->array_size++] = key;
-	object->array[object->array_size++] = value;
-	object->size += value->size;
-	object->size += key->size;
-	bof_incref(value);
-	return 0;
-}
-
-/*
- * array
- */
-bof_t *bof_array(void)
-{
-	bof_t *array = bof_object();
-
-	if (array == NULL)
-		return NULL;
-	array->type = BOF_TYPE_ARRAY;
-	array->size = 12;
-	return array;
-}
-
-int bof_array_append(bof_t *array, bof_t *value)
-{
-	int r;
-	if (array->type != BOF_TYPE_ARRAY)
-		return -EINVAL;
-	r = bof_entry_grow(array);
-	if (r)
-		return r;
-	array->array[array->array_size++] = value;
-	array->size += value->size;
-	bof_incref(value);
-	return 0;
-}
-
-bof_t *bof_array_get(bof_t *bof, unsigned i)
-{
-	if (!bof_is_array(bof) || i >= bof->array_size)
-		return NULL;
-	return bof->array[i];
-}
-
-unsigned bof_array_size(bof_t *bof)
-{
-	if (!bof_is_array(bof))
-		return 0;
-	return bof->array_size;
-}
-
-/*
- * blob
- */
-bof_t *bof_blob(unsigned size, void *value)
-{
-	bof_t *blob = bof_object();
-
-	if (blob == NULL)
-		return NULL;
-	blob->type = BOF_TYPE_BLOB;
-	blob->value = calloc(1, size);
-	if (blob->value == NULL) {
-		bof_decref(blob);
-		return NULL;
-	}
-	blob->size = size;
-	memcpy(blob->value, value, size);
-	blob->size += 12;
-	return blob;
-}
-
-unsigned bof_blob_size(bof_t *bof)
-{
-	if (!bof_is_blob(bof))
-		return 0;
-	return bof->size - 12;
-}
-
-void *bof_blob_value(bof_t *bof)
-{
-	if (!bof_is_blob(bof))
-		return NULL;
-	return bof->value;
-}
-
-/*
- * string
- */
-bof_t *bof_string(const char *value)
-{
-	bof_t *string = bof_object();
-
-	if (string == NULL)
-		return NULL;
-	string->type = BOF_TYPE_STRING;
-	string->size = strlen(value) + 1;
-	string->value = calloc(1, string->size);
-	if (string->value == NULL) {
-		bof_decref(string);
-		return NULL;
-	}
-	strcpy(string->value, value);
-	string->size += 12;
-	return string;
-}
-
-/*
- *  int32
- */
-bof_t *bof_int32(int32_t value)
-{
-	bof_t *int32 = bof_object();
-
-	if (int32 == NULL)
-		return NULL;
-	int32->type = BOF_TYPE_INT32;
-	int32->size = 4;
-	int32->value = calloc(1, int32->size);
-	if (int32->value == NULL) {
-		bof_decref(int32);
-		return NULL;
-	}
-	memcpy(int32->value, &value, 4);
-	int32->size += 12;
-	return int32;
-}
-
-int32_t bof_int32_value(bof_t *bof)
-{
-	return *((uint32_t*)bof->value);
-}
-
-/*
- *  common
- */
-static void bof_indent(int level)
-{
-	int i;
-
-	for (i = 0; i < level; i++)
-		fprintf(stderr, " ");
-}
-
-static void bof_print_bof(bof_t *bof, int level, int entry)
-{
-	bof_indent(level);
-	if (bof == NULL) {
-		fprintf(stderr, "--NULL-- for entry %d\n", entry);
-		return;
-	}
-	switch (bof->type) {
-	case BOF_TYPE_STRING:
-		fprintf(stderr, "%p string [%s %d]\n", bof, (char*)bof->value, bof->size);
-		break;
-	case BOF_TYPE_INT32:
-		fprintf(stderr, "%p int32 [%d %d]\n", bof, *(int*)bof->value, bof->size);
-		break;
-	case BOF_TYPE_BLOB:
-		fprintf(stderr, "%p blob [%d]\n", bof, bof->size);
-		break;
-	case BOF_TYPE_NULL:
-		fprintf(stderr, "%p null [%d]\n", bof, bof->size);
-		break;
-	case BOF_TYPE_OBJECT:
-		fprintf(stderr, "%p object [%d %d]\n", bof, bof->array_size / 2, bof->size);
-		break;
-	case BOF_TYPE_ARRAY:
-		fprintf(stderr, "%p array [%d %d]\n", bof, bof->array_size, bof->size);
-		break;
-	default:
-		fprintf(stderr, "%p unknown [%d]\n", bof, bof->type);
-		return;
-	}
-}
-
-static void bof_print_rec(bof_t *bof, int level, int entry)
-{
-	unsigned i;
-
-	bof_print_bof(bof, level, entry);
-	for (i = 0; i < bof->array_size; i++) {
-		bof_print_rec(bof->array[i], level + 2, i);
-	}
-}
-
-void bof_print(bof_t *bof)
-{
-	bof_print_rec(bof, 0, 0);
-}
-
-static int bof_read(bof_t *root, FILE *file, long end, int level)
-{
-	bof_t *bof = NULL;
-	int r;
-
-	if (ftell(file) >= end) {
-		return 0;
-	}
-	r = bof_entry_grow(root);
-	if (r)
-		return r;
-	bof = bof_object();
-	if (bof == NULL)
-		return -ENOMEM;
-	bof->offset = ftell(file);
-	r = fread(&bof->type, 4, 1, file);
-	if (r != 1)
-		goto out_err;
-	r = fread(&bof->size, 4, 1, file);
-	if (r != 1)
-		goto out_err;
-	r = fread(&bof->array_size, 4, 1, file);
-	if (r != 1)
-		goto out_err;
-	switch (bof->type) {
-	case BOF_TYPE_STRING:
-	case BOF_TYPE_INT32:
-	case BOF_TYPE_BLOB:
-		bof->value = calloc(1, bof->size - 12);
-		if (bof->value == NULL) {
-			goto out_err;
-		}
-		r = fread(bof->value, bof->size - 12, 1, file);
-		if (r != 1) {
-			fprintf(stderr, "error reading %d\n", bof->size - 12);
-			goto out_err;
-		}
-		break;
-	case BOF_TYPE_NULL:
-		return 0;
-	case BOF_TYPE_OBJECT:
-	case BOF_TYPE_ARRAY:
-		r = bof_read(bof, file, bof->offset + bof->size, level + 2);
-		if (r)
-			goto out_err;
-		break;
-	default:
-		fprintf(stderr, "invalid type %d\n", bof->type);
-		goto out_err;
-	}
-	root->array[root->centry++] = bof;
-	return bof_read(root, file, end, level);
-out_err:
-	bof_decref(bof);
-	return -EINVAL;
-}
-
-bof_t *bof_load_file(const char *filename)
-{
-	bof_t *root = bof_object();
-	int r;
-
-	if (root == NULL) {
-		fprintf(stderr, "%s failed to create root object\n", __func__);
-		return NULL;
-	}
-	root->file = fopen(filename, "r");
-	if (root->file == NULL)
-		goto out_err;
-	r = fseek(root->file, 0L, SEEK_SET);
-	if (r) {
-		fprintf(stderr, "%s failed to seek into file %s\n", __func__, filename);
-		goto out_err;
-	}
-	root->offset = ftell(root->file);
-	r = fread(&root->type, 4, 1, root->file);
-	if (r != 1)
-		goto out_err;
-	r = fread(&root->size, 4, 1, root->file);
-	if (r != 1)
-		goto out_err;
-	r = fread(&root->array_size, 4, 1, root->file);
-	if (r != 1)
-		goto out_err;
-	r = bof_read(root, root->file, root->offset + root->size, 2);
-	if (r)
-		goto out_err;
-	return root;
-out_err:
-	bof_decref(root);
-	return NULL;
-}
-
-void bof_incref(bof_t *bof)
-{
-	bof->refcount++;
-}
-
-void bof_decref(bof_t *bof)
-{
-	unsigned i;
-
-	if (bof == NULL)
-		return;
-	if (--bof->refcount > 0)
-		return;
-	for (i = 0; i < bof->array_size; i++) {
-		bof_decref(bof->array[i]);
-		bof->array[i] = NULL;
-	}
-	bof->array_size = 0;
-	if (bof->file) {
-		fclose(bof->file);
-		bof->file = NULL;
-	}
-	free(bof->array);
-	free(bof->value);
-	free(bof);
-}
-
-static int bof_file_write(bof_t *bof, FILE *file)
-{
-	unsigned i;
-	int r;
-
-	r = fwrite(&bof->type, 4, 1, file);
-	if (r != 1)
-		return -EINVAL;
-	r = fwrite(&bof->size, 4, 1, file);
-	if (r != 1)
-		return -EINVAL;
-	r = fwrite(&bof->array_size, 4, 1, file);
-	if (r != 1)
-		return -EINVAL;
-	switch (bof->type) {
-	case BOF_TYPE_NULL:
-		if (bof->size)
-			return -EINVAL;
-		break;
-	case BOF_TYPE_STRING:
-	case BOF_TYPE_INT32:
-	case BOF_TYPE_BLOB:
-		r = fwrite(bof->value, bof->size - 12, 1, file);
-		if (r != 1)
-			return -EINVAL;
-		break;
-	case BOF_TYPE_OBJECT:
-	case BOF_TYPE_ARRAY:
-		for (i = 0; i < bof->array_size; i++) {
-			r = bof_file_write(bof->array[i], file);
-			if (r)
-				return r;
-		}
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-
-int bof_dump_file(bof_t *bof, const char *filename)
-{
-	unsigned i;
-	int r = 0;
-
-	if (bof->file) {
-		fclose(bof->file);
-		bof->file = NULL;
-	}
-	bof->file = fopen(filename, "w");
-	if (bof->file == NULL) {
-		fprintf(stderr, "%s failed to open file %s\n", __func__, filename);
-		r = -EINVAL;
-		goto out_err;
-	}
-	r = fseek(bof->file, 0L, SEEK_SET);
-	if (r) {
-		fprintf(stderr, "%s failed to seek into file %s\n", __func__, filename);
-		goto out_err;
-	}
-	r = fwrite(&bof->type, 4, 1, bof->file);
-	if (r != 1)
-		goto out_err;
-	r = fwrite(&bof->size, 4, 1, bof->file);
-	if (r != 1)
-		goto out_err;
-	r = fwrite(&bof->array_size, 4, 1, bof->file);
-	if (r != 1)
-		goto out_err;
-	for (i = 0; i < bof->array_size; i++) {
-		r = bof_file_write(bof->array[i], bof->file);
-		if (r)
-			return r;
-	}
-out_err:
-	fclose(bof->file);
-	bof->file = NULL;
-	return r;
-}
diff --git a/src/gallium/winsys/r600/drm/bof.h b/src/gallium/winsys/r600/drm/bof.h
deleted file mode 100644
index 014affb74f1..00000000000
--- a/src/gallium/winsys/r600/drm/bof.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *      Jerome Glisse
- */
-#ifndef BOF_H
-#define BOF_H
-
-#include <stdio.h>
-#include <stdint.h>
-
-#define BOF_TYPE_STRING		0
-#define BOF_TYPE_NULL		1
-#define BOF_TYPE_BLOB		2
-#define BOF_TYPE_OBJECT		3
-#define BOF_TYPE_ARRAY		4
-#define BOF_TYPE_INT32		5
-
-struct bof;
-
-typedef struct bof {
-	struct bof	**array;
-	unsigned	centry;
-	unsigned	nentry;
-	unsigned	refcount;
-	FILE		*file;
-	uint32_t	type;
-	uint32_t	size;
-	uint32_t	array_size;
-	void		*value;
-	long		offset;
-} bof_t;
-
-extern int bof_file_flush(bof_t *root);
-extern bof_t *bof_file_new(const char *filename);
-extern int bof_object_dump(bof_t *object, const char *filename);
-
-/* object */
-extern bof_t *bof_object(void);
-extern bof_t *bof_object_get(bof_t *object, const char *keyname);
-extern int bof_object_set(bof_t *object, const char *keyname, bof_t *value);
-/* array */
-extern bof_t *bof_array(void);
-extern int bof_array_append(bof_t *array, bof_t *value);
-extern bof_t *bof_array_get(bof_t *bof, unsigned i);
-extern unsigned bof_array_size(bof_t *bof);
-/* blob */
-extern bof_t *bof_blob(unsigned size, void *value);
-extern unsigned bof_blob_size(bof_t *bof);
-extern void *bof_blob_value(bof_t *bof);
-/* string */
-extern bof_t *bof_string(const char *value);
-/* int32 */
-extern bof_t *bof_int32(int32_t value);
-extern int32_t bof_int32_value(bof_t *bof);
-/* common functions */
-extern void bof_decref(bof_t *bof);
-extern void bof_incref(bof_t *bof);
-extern bof_t *bof_load_file(const char *filename);
-extern int bof_dump_file(bof_t *bof, const char *filename);
-extern void bof_print(bof_t *bof);
-
-static inline int bof_is_object(bof_t *bof){return (bof->type == BOF_TYPE_OBJECT);}
-static inline int bof_is_blob(bof_t *bof){return (bof->type == BOF_TYPE_BLOB);}
-static inline int bof_is_null(bof_t *bof){return (bof->type == BOF_TYPE_NULL);}
-static inline int bof_is_int32(bof_t *bof){return (bof->type == BOF_TYPE_INT32);}
-static inline int bof_is_array(bof_t *bof){return (bof->type == BOF_TYPE_ARRAY);}
-static inline int bof_is_string(bof_t *bof){return (bof->type == BOF_TYPE_STRING);}
-
-#endif
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 60d2e289396..5729fdd6326 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -32,7 +32,6 @@
 #include "r600.h"
 #include "evergreend.h"
 #include "radeon_drm.h"
-#include "bof.h"
 #include "pipe/p_compiler.h"
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 07bd544d1a0..f1150712b23 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -35,7 +35,6 @@
 #include "xf86drm.h"
 #include "radeon_drm.h"
 #include "r600_priv.h"
-#include "bof.h"
 #include "r600d.h"
 
 #define GROUP_FORCE_NEW_BLOCK	0
@@ -1615,88 +1614,6 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo,
 	r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], fence_bo);
 }
 
-void r600_context_dump_bof(struct r600_context *ctx, const char *file)
-{
-	bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root;
-	unsigned i;
-
-	root = device_id = bcs = blob = array = bo = size = handle = NULL;
-	root = bof_object();
-	if (root == NULL)
-		goto out_err;
-	device_id = bof_int32(ctx->radeon->device);
-	if (device_id == NULL)
-		goto out_err;
-	if (bof_object_set(root, "device_id", device_id))
-		goto out_err;
-	bof_decref(device_id);
-	device_id = NULL;
-	/* dump relocs */
-	blob = bof_blob(ctx->creloc * 16, ctx->reloc);
-	if (blob == NULL)
-		goto out_err;
-	if (bof_object_set(root, "reloc", blob))
-		goto out_err;
-	bof_decref(blob);
-	blob = NULL;
-	/* dump cs */
-	blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4);
-	if (blob == NULL)
-		goto out_err;
-	if (bof_object_set(root, "pm4", blob))
-		goto out_err;
-	bof_decref(blob);
-	blob = NULL;
-	/* dump bo */
-	array = bof_array();
-	if (array == NULL)
-		goto out_err;
-	for (i = 0; i < ctx->creloc; i++) {
-		struct radeon_bo *rbo = ctx->bo[i];
-		bo = bof_object();
-		if (bo == NULL)
-			goto out_err;
-		size = bof_int32(rbo->size);
-		if (size == NULL)
-			goto out_err;
-		if (bof_object_set(bo, "size", size))
-			goto out_err;
-		bof_decref(size);
-		size = NULL;
-		handle = bof_int32(rbo->handle);
-		if (handle == NULL)
-			goto out_err;
-		if (bof_object_set(bo, "handle", handle))
-			goto out_err;
-		bof_decref(handle);
-		handle = NULL;
-		radeon_bo_map(ctx->radeon, rbo);
-		blob = bof_blob(rbo->size, rbo->data);
-		radeon_bo_unmap(ctx->radeon, rbo);
-		if (blob == NULL)
-			goto out_err;
-		if (bof_object_set(bo, "data", blob))
-			goto out_err;
-		bof_decref(blob);
-		blob = NULL;
-		if (bof_array_append(array, bo))
-			goto out_err;
-		bof_decref(bo);
-		bo = NULL;
-	}
-	if (bof_object_set(root, "bo", array))
-		goto out_err;
-	bof_dump_file(root, file);
-out_err:
-	bof_decref(blob);
-	bof_decref(array);
-	bof_decref(bo);
-	bof_decref(size);
-	bof_decref(handle);
-	bof_decref(device_id);
-	bof_decref(root);
-}
-
 static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait)
 {
 	unsigned results_base = query->results_start;

From e69dde5233a2fc6ad4c5483d079e1ea3a2123a59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Sat, 23 Jul 2011 04:29:59 +0200
Subject: [PATCH 193/600] r600g: remove dummy function r600_bo_offset

Always returned 0.
---
 src/gallium/drivers/r600/evergreen_state.c    | 20 +++++++++----------
 src/gallium/drivers/r600/r600.h               |  7 ++-----
 src/gallium/drivers/r600/r600_state.c         | 18 ++++++++---------
 src/gallium/drivers/r600/r600_state_common.c  |  3 +--
 .../winsys/r600/drm/evergreen_hw_context.c    |  2 +-
 src/gallium/winsys/r600/drm/r600_hw_context.c | 12 +++++------
 6 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index bc6039dd40c..c9eaf94a2ae 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1023,8 +1023,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
 	rstate->val[1] = (S_030004_TEX_HEIGHT(texture->height0 - 1) |
 			  S_030004_TEX_DEPTH(texture->depth0 - 1) |
 			  S_030004_ARRAY_MODE(array_mode));
-	rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8;
-	rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8;
+	rstate->val[2] = tmp->offset[0] >> 8;
+	rstate->val[3] = tmp->offset[1] >> 8;
 	rstate->val[4] = (word4 |
 			  S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
 			  S_030010_ENDIAN_SWAP(endian) |
@@ -1354,7 +1354,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
 	/* FIXME handle enabling of CB beyond BASE8 which has different offset */
 	r600_pipe_state_add_reg(rstate,
 				R_028C60_CB_COLOR0_BASE + cb * 0x3C,
-				(offset +  r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
+				offset >> 8, 0xFFFFFFFF, bo[0]);
 	r600_pipe_state_add_reg(rstate,
 				R_028C78_CB_COLOR0_DIM + cb * 0x3C,
 				0x0, 0xFFFFFFFF, NULL);
@@ -1407,18 +1407,18 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 	stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format);
 
 	r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
-				(offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+				offset >> 8, 0xFFFFFFFF, rbuffer->bo);
 	r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE,
-				(offset  + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+				offset >> 8, 0xFFFFFFFF, rbuffer->bo);
 
 	if (stencil_format) {
 		uint32_t stencil_offset;
 
 		stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255;
 		r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
-					(offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+					(offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
 		r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
-					(offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+					(offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
 	}
 
 	r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
@@ -2265,7 +2265,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
 
 	r600_pipe_state_add_reg(rstate,
 				R_028840_SQ_PGM_START_PS,
-				(r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo);
+				0, 0xFFFFFFFF, shader->bo);
 	r600_pipe_state_add_reg(rstate,
 				R_028844_SQ_PGM_RESOURCES_PS,
 				S_028844_NUM_GPRS(rshader->bc.ngpr) |
@@ -2339,7 +2339,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
 				0x0, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate,
 			R_02885C_SQ_PGM_START_VS,
-			(r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo);
+			0, 0xFFFFFFFF, shader->bo);
 
 	r600_pipe_state_add_reg(rstate,
 				R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
@@ -2356,7 +2356,7 @@ void evergreen_fetch_shader(struct pipe_context *ctx,
 	r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
 				0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
-				(r600_bo_offset(ve->fetch_shader)) >> 8,
+				0,
 				0xFFFFFFFF, ve->fetch_shader);
 }
 
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index d2b03418ede..2e759c79409 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -105,11 +105,8 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon,
 void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx);
 void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
 boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo,
-				unsigned stride, struct winsys_handle *whandle);
-static INLINE unsigned r600_bo_offset(struct r600_bo *bo)
-{
-	return 0;
-}
+				  unsigned stride, struct winsys_handle *whandle);
+
 void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo);
 
 /* this relies on the pipe_reference being the first member of r600_bo */
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 1350a1cf565..487b1df0052 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1077,8 +1077,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
 	rstate->val[1] = (S_038004_TEX_HEIGHT(height - 1) |
 			  S_038004_TEX_DEPTH(depth - 1) |
 			  S_038004_DATA_FORMAT(format));
-	rstate->val[2] = (tmp->offset[offset_level] + r600_bo_offset(bo[0])) >> 8;
-	rstate->val[3] = (tmp->offset[offset_level+1] + r600_bo_offset(bo[1])) >> 8;
+	rstate->val[2] = tmp->offset[offset_level] >> 8;
+	rstate->val[3] = tmp->offset[offset_level+1] >> 8;
 	rstate->val[4] = (word4 |
 			  S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
 			  S_038010_REQUEST_SIZE(1) |
@@ -1441,7 +1441,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 
 	r600_pipe_state_add_reg(rstate,
 				R_028040_CB_COLOR0_BASE + cb * 4,
-				(offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
+				offset >> 8, 0xFFFFFFFF, bo[0]);
 	r600_pipe_state_add_reg(rstate,
 				R_0280A0_CB_COLOR0_INFO + cb * 4,
 				color_info, 0xFFFFFFFF, bo[0]);
@@ -1455,10 +1455,10 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 				0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate,
 				R_0280E0_CB_COLOR0_FRAG + cb * 4,
-				r600_bo_offset(bo[1]) >> 8, 0xFFFFFFFF, bo[1]);
+				0, 0xFFFFFFFF, bo[1]);
 	r600_pipe_state_add_reg(rstate,
 				R_0280C0_CB_COLOR0_TILE + cb * 4,
-				r600_bo_offset(bo[2]) >> 8, 0xFFFFFFFF, bo[2]);
+				0, 0xFFFFFFFF, bo[2]);
 	r600_pipe_state_add_reg(rstate,
 				R_028100_CB_COLOR0_MASK + cb * 4,
 				0x00000000, 0xFFFFFFFF, NULL);
@@ -1492,7 +1492,7 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 	format = r600_translate_dbformat(state->zsbuf->texture->format);
 
 	r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE,
-				(offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo);
+				offset >> 8, 0xFFFFFFFF, rbuffer->bo);
 	r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE,
 				S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice),
 				0xFFFFFFFF, NULL);
@@ -2027,7 +2027,7 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shad
 	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate,
 				R_028840_SQ_PGM_START_PS,
-				r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+				0, 0xFFFFFFFF, shader->bo);
 	r600_pipe_state_add_reg(rstate,
 				R_028850_SQ_PGM_RESOURCES_PS,
 				S_028868_NUM_GPRS(rshader->bc.ngpr) |
@@ -2106,7 +2106,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad
 			0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate,
 			R_028858_SQ_PGM_START_VS,
-			r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+			0, 0xFFFFFFFF, shader->bo);
 
 	r600_pipe_state_add_reg(rstate,
 				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
@@ -2127,7 +2127,7 @@ void r600_fetch_shader(struct pipe_context *ctx,
 	r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
 				0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
-				r600_bo_offset(ve->fetch_shader) >> 8,
+				0,
 				0xFFFFFFFF, ve->fetch_shader);
 }
 
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 408eaed491b..9f3ab89fdf7 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -418,7 +418,6 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 	}
 
 	r600_upload_const_buffer(rctx, &rbuffer, &offset);
-	offset += r600_bo_offset(rbuffer->r.bo);
 
 	switch (shader) {
 	case PIPE_SHADER_VERTEX:
@@ -518,7 +517,7 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
 		}
 		if (vertex_buffer == NULL || rbuffer == NULL)
 			continue;
-		offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo);
+		offset += vertex_buffer->buffer_offset;
 
 		if (!rstate->id) {
 			if (rctx->chip_class >= EVERGREEN) {
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 5729fdd6326..98283ffbefc 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -1202,7 +1202,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 	pm4[3] = draw->vgt_num_instances;
 	if (draw->indices) {
 	        pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
-		pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices);
+		pm4[5] = draw->indices_bo_offset;
 		pm4[6] = 0;
 		pm4[7] = draw->vgt_num_indices;
 		pm4[8] = draw->vgt_draw_initiator;
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index f1150712b23..35c086ae680 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -1468,7 +1468,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 	pm4[3] = draw->vgt_num_instances;
 	if (draw->indices) {
 		pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
-		pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices);
+		pm4[5] = draw->indices_bo_offset;
 		pm4[6] = 0;
 		pm4[7] = draw->vgt_num_indices;
 		pm4[8] = draw->vgt_draw_initiator;
@@ -1710,14 +1710,14 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 	if (query->type == PIPE_QUERY_TIME_ELAPSED) {
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + r600_bo_offset(query->buffer);
+		ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
 		ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	} else {
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + r600_bo_offset(query->buffer);
+		ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	}
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
@@ -1735,14 +1735,14 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
 	if (query->type == PIPE_QUERY_TIME_ELAPSED) {
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8 + r600_bo_offset(query->buffer);
+		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8;
 		ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	} else {
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
-		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8 + r600_bo_offset(query->buffer);
+		ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8;
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	}
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
@@ -1789,7 +1789,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 		/* emit predicate packets for all data blocks */
 		while (results_base != query->results_end) {
 			ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
-			ctx->pm4[ctx->pm4_cdwords++] = results_base + r600_bo_offset(query->buffer);
+			ctx->pm4[ctx->pm4_cdwords++] = results_base;
 			ctx->pm4[ctx->pm4_cdwords++] = op;
 			ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
 			ctx->pm4[ctx->pm4_cdwords++] = 0;

From 0f1aae3ae7cef051f87dae056c46fcfd0afaab20 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 1 Aug 2011 16:06:59 -0700
Subject: [PATCH 194/600] intel: Fix unused variable warning.

---
 src/mesa/drivers/dri/intel/intel_fbo.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index e48d6ef9cbd..65ad621e770 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -596,7 +596,6 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb,
 				   struct intel_texture_image *intel_image,
 				   int zoffset)
 {
-   struct intel_mipmap_tree *mt = intel_image->mt;
    unsigned int dst_x, dst_y;
 
    /* compute offset of the particular 2D image within the texture region */

From 7cf799d47269ce01d3e5981709744a16b7c2756c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 2 Aug 2011 13:36:57 -0700
Subject: [PATCH 195/600] radeon: Remove set-but-unused color_mask variable.

This has been around since the initial import in 2003 and never used.
---
 src/mesa/drivers/dri/r200/r200_ioctl.c     | 3 ---
 src/mesa/drivers/dri/radeon/radeon_ioctl.c | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
index 02201cb53d6..44a794da396 100644
--- a/src/mesa/drivers/dri/r200/r200_ioctl.c
+++ b/src/mesa/drivers/dri/r200/r200_ioctl.c
@@ -185,7 +185,6 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask )
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
    GLuint flags = 0;
-   GLuint color_mask = 0;
    GLuint orig_mask = mask;
 
    if ( R200_DEBUG & RADEON_IOCTL ) {
@@ -206,13 +205,11 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask )
 
    if ( mask & BUFFER_BIT_FRONT_LEFT ) {
       flags |= RADEON_FRONT;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~BUFFER_BIT_FRONT_LEFT;
    }
 
    if ( mask & BUFFER_BIT_BACK_LEFT ) {
       flags |= RADEON_BACK;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~BUFFER_BIT_BACK_LEFT;
    }
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
index a91d8727792..c23e9c2d2a2 100644
--- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
@@ -560,7 +560,6 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask )
    r100ContextPtr rmesa = R100_CONTEXT(ctx);
    __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
    GLuint flags = 0;
-   GLuint color_mask = 0;
    GLuint orig_mask = mask;
 
    if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
@@ -582,13 +581,11 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask )
 
    if ( mask & BUFFER_BIT_FRONT_LEFT ) {
       flags |= RADEON_FRONT;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~BUFFER_BIT_FRONT_LEFT;
    }
 
    if ( mask & BUFFER_BIT_BACK_LEFT ) {
       flags |= RADEON_BACK;
-      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
       mask &= ~BUFFER_BIT_BACK_LEFT;
    }
 

From 25fffa9364baef76a7e7e875be1fb3c4f10aadfd Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 2 Aug 2011 13:39:43 -0700
Subject: [PATCH 196/600] radeon: Remove set-but-unused log2depth variable.

r100 doesn't support 3D GL_EXT_texture3D.
---
 src/mesa/drivers/dri/radeon/radeon_texstate.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 9ba98e303a7..3abaa1504a4 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -1018,7 +1018,7 @@ static GLboolean radeon_validate_texgen( struct gl_context *ctx, GLuint unit )
 static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
 {
    const struct gl_texture_image *firstImage;
-   GLint log2Width, log2Height, log2Depth, texelBytes;
+   GLint log2Width, log2Height, texelBytes;
 
    if ( t->bo ) {
 	return GL_TRUE;
@@ -1033,7 +1033,6 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int
 
    log2Width  = firstImage->WidthLog2;
    log2Height = firstImage->HeightLog2;
-   log2Depth  = firstImage->DepthLog2;
    texelBytes = _mesa_get_format_bytes(firstImage->TexFormat);
 
    if (!t->image_override) {

From f5e612ab594689c7736f8af082e88c107bd7582c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 2 Aug 2011 13:41:59 -0700
Subject: [PATCH 197/600] radeon: Remove set-but-unused variables in
 radeonSetTexBuffer2() variants.

These have been unused since 2009.
---
 src/mesa/drivers/dri/r200/r200_texstate.c     | 6 ------
 src/mesa/drivers/dri/r300/r300_texstate.c     | 5 -----
 src/mesa/drivers/dri/r600/evergreen_tex.c     | 7 -------
 src/mesa/drivers/dri/r600/r600_texstate.c     | 5 -----
 src/mesa/drivers/dri/radeon/radeon_texstate.c | 6 ------
 5 files changed, 29 deletions(-)

diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
index 7adf9ad73ed..8c9bd6d00b2 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -773,18 +773,12 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
 	struct radeon_renderbuffer *rb;
 	radeon_texture_image *rImage;
 	radeonContextPtr radeon;
-	r200ContextPtr rmesa;
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, format;
 	gl_format texFormat;
 
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
 	radeon = pDRICtx->driverPrivate;
-	rmesa = pDRICtx->driverPrivate;
 
 	rfb = dPriv->driverPrivate;
         texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index e24ad6f088d..e4388a021ed 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -427,13 +427,8 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, type, format;
 	gl_format texFormat;
 
-	type = GL_BGRA;
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
 	radeon = pDRICtx->driverPrivate;
 	rmesa = pDRICtx->driverPrivate;
 
diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c
index 9784a8484f2..d240a216817 100644
--- a/src/mesa/drivers/dri/r600/evergreen_tex.c
+++ b/src/mesa/drivers/dri/r600/evergreen_tex.c
@@ -1288,19 +1288,12 @@ void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_textur
 	struct radeon_renderbuffer *rb;
 	radeon_texture_image *rImage;
 	radeonContextPtr radeon;
-	context_t *rmesa;
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, type, format;
 	gl_format texFormat;
 
-	type = GL_BGRA;
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
 	radeon = pDRICtx->driverPrivate;
-	rmesa = pDRICtx->driverPrivate;
 
 	rfb = dPriv->driverPrivate;
         texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 949db29c189..65fae7195fd 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -1141,13 +1141,8 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, type, format;
         gl_format texFormat;
 
-	type = GL_BGRA;
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
-
 	radeon = pDRICtx->driverPrivate;
 	rmesa = pDRICtx->driverPrivate;
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 3abaa1504a4..430309392a0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -648,18 +648,12 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form
 	struct radeon_renderbuffer *rb;
 	radeon_texture_image *rImage;
 	radeonContextPtr radeon;
-	r100ContextPtr rmesa;
 	struct radeon_framebuffer *rfb;
 	radeonTexObjPtr t;
 	uint32_t pitch_val;
-	uint32_t internalFormat, format;
 	gl_format texFormat;
 
-	format = GL_UNSIGNED_BYTE;
-	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA);
-
 	radeon = pDRICtx->driverPrivate;
-	rmesa = pDRICtx->driverPrivate;
 
 	rfb = dPriv->driverPrivate;
         texUnit = _mesa_get_current_tex_unit(radeon->glCtx);

From 8de1d42f244f6315c471b01ef52a61f61d227c6d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 2 Aug 2011 13:47:18 -0700
Subject: [PATCH 198/600] radeon: Remove set-but-unused variables in
 radeon_lock.c

These have been unused since this function's introduction in the FBO
support development around 2009.
---
 src/mesa/drivers/dri/radeon/radeon_lock.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
index 7b6bd36dcf7..ae8a212f806 100644
--- a/src/mesa/drivers/dri/radeon/radeon_lock.c
+++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
@@ -114,16 +114,6 @@ void radeon_lock_hardware(radeonContextPtr radeon
 		)
 {
 	char ret = 0;
-	struct radeon_framebuffer *rfb = NULL;
-	struct radeon_renderbuffer *rrb = NULL;
-
-	if (radeon_get_drawable(radeon)) {
-		rfb = radeon_get_drawable(radeon)->driverPrivate;
-
-		if (rfb)
-			rrb = radeon_get_renderbuffer(&rfb->base,
-						      rfb->base._ColorDrawBufferIndexes[0]);
-	}
 
 	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
 		if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1)

From e0e4c2e30552e524c91b2eb98a2dabdcd4666169 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 2 Aug 2011 13:49:05 -0700
Subject: [PATCH 199/600] radeon: Remove some remaining set-but-unused
 variables.

These looked more like copy-and-paste to me than the others (which
looked more like possibly someone forgot to write some code in a
refactor), so I didn't verify where they came from.
---
 src/mesa/drivers/dri/r600/r600_cmdbuf.c             | 2 --
 src/mesa/drivers/dri/radeon/radeon_common.c         | 8 --------
 src/mesa/drivers/dri/radeon/radeon_common_context.c | 2 --
 src/mesa/drivers/dri/radeon/radeon_cs_legacy.c      | 2 --
 4 files changed, 14 deletions(-)

diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
index ce2f7779563..74f048b1062 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
@@ -259,13 +259,11 @@ static int r600_cs_process_relocs(struct radeon_cs_int *csi,
                                   uint32_t * reloc_chunk,
                                   uint32_t * length_dw_reloc_chunk) 
 {
-    struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
     struct r600_cs_reloc_legacy *relocs;
     int i, j, r;
 
     uint32_t offset_dw = 0;
 
-    csm = (struct r600_cs_manager_legacy*)csi->csm;
     relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
 restart:
     for (i = 0; i < csi->crelocs; i++) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index bfc307ca987..e7a6623cf84 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -436,7 +436,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv,
 		       const drm_clip_rect_t	  *rect)
 {
 	radeonContextPtr rmesa;
-	struct radeon_framebuffer *rfb;
 	GLint nbox, i, ret;
 
 	assert(dPriv);
@@ -447,8 +446,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv,
 
 	LOCK_HARDWARE(rmesa);
 
-	rfb = dPriv->driverPrivate;
-
 	if ( RADEON_DEBUG & RADEON_IOCTL ) {
 		fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
 	}
@@ -527,8 +524,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv )
 {
 	radeonContextPtr radeon;
 	GLint ret;
-	__DRIscreen *psp;
-	struct radeon_renderbuffer *rrb;
 	struct radeon_framebuffer *rfb;
 
 	assert(dPriv);
@@ -537,9 +532,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv )
 
 	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
 	rfb = dPriv->driverPrivate;
-	rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
-
-	psp = dPriv->driScreenPriv;
 
 	LOCK_HARDWARE(radeon);
 
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index bf8925f61d0..c08b79484af 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -515,7 +515,6 @@ void radeon_prepare_render(radeonContextPtr radeon)
     __DRIcontext *driContext = radeon->dri.context;
     __DRIdrawable *drawable;
     __DRIscreen *screen;
-    struct radeon_framebuffer *draw;
 
     screen = driContext->driScreenPriv;
     if (!screen->dri2.loader)
@@ -527,7 +526,6 @@ void radeon_prepare_render(radeonContextPtr radeon)
 	    radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
 
 	/* Intel driver does the equivalent of this, no clue if it is needed:*/
-	draw = drawable->driverPrivate;
 	radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer);
 
 	driContext->dri2.draw_stamp = drawable->dri2.stamp;
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
index c2722a4e195..5595b705b15 100644
--- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
@@ -218,11 +218,9 @@ static int cs_end(struct radeon_cs_int *cs,
 
 static int cs_process_relocs(struct radeon_cs_int *cs)
 {
-    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
     struct cs_reloc_legacy *relocs;
     int i, j, r;
 
-    csm = (struct cs_manager_legacy*)cs->csm;
     relocs = (struct cs_reloc_legacy *)cs->relocs;
 restart:
     for (i = 0; i < cs->crelocs; i++) 

From b5e39405831092d8cf7943318c92b750325eb31e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 3 Aug 2011 01:13:06 +0200
Subject: [PATCH 200/600] util: fix a typo in util_format_swizzle_4f

Reported by Gustaw Smolarczyk.
---
 src/gallium/auxiliary/util/u_format.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c
index 3a8aeab5fed..34922ab18ab 100644
--- a/src/gallium/auxiliary/util/u_format.c
+++ b/src/gallium/auxiliary/util/u_format.c
@@ -409,7 +409,7 @@ void util_format_swizzle_4f(float *dst, const float *src,
    unsigned i;
 
    for (i = 0; i < 4; i++) {
-      if (swz[i] < UTIL_FORMAT_SWIZZLE_W)
+      if (swz[i] <= UTIL_FORMAT_SWIZZLE_W)
          dst[i] = src[swz[i]];
       else if (swz[i] == UTIL_FORMAT_SWIZZLE_0)
          dst[i] = 0;

From 2664980760c5cf2e7dde4065f9cc8e8b865627c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 3 Aug 2011 00:52:55 +0200
Subject: [PATCH 201/600] winsys/radeon: remove dummy function pb_buffer

---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 17 +++++++----------
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h |  6 ------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 796262ccfdb..2eb9d134407 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -89,7 +89,7 @@ static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf)
 
 static void radeon_bo_wait(struct pb_buffer *_buf)
 {
-    struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
+    struct radeon_bo *bo = get_radeon_bo(_buf);
     struct drm_radeon_gem_wait_idle args = {};
 
     while (p_atomic_read(&bo->num_active_ioctls)) {
@@ -105,7 +105,7 @@ static void radeon_bo_wait(struct pb_buffer *_buf)
 
 static boolean radeon_bo_is_busy(struct pb_buffer *_buf)
 {
-    struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
+    struct radeon_bo *bo = get_radeon_bo(_buf);
     struct drm_radeon_gem_busy args = {};
     boolean busy;
 
@@ -395,16 +395,14 @@ static void *radeon_bo_map(struct pb_buffer *buf,
                            struct radeon_winsys_cs *cs,
                            enum pipe_transfer_usage usage)
 {
-    struct pb_buffer *_buf = pb_buffer(buf);
-
-    return pb_map(_buf, get_pb_usage_from_transfer_flags(usage), cs);
+    return pb_map(buf, get_pb_usage_from_transfer_flags(usage), cs);
 }
 
 static void radeon_bo_get_tiling(struct pb_buffer *_buf,
                                  enum radeon_bo_layout *microtiled,
                                  enum radeon_bo_layout *macrotiled)
 {
-    struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
+    struct radeon_bo *bo = get_radeon_bo(_buf);
     struct drm_radeon_gem_set_tiling args = {};
 
     args.handle = bo->handle;
@@ -429,7 +427,7 @@ static void radeon_bo_set_tiling(struct pb_buffer *_buf,
                                  enum radeon_bo_layout macrotiled,
                                  uint32_t pitch)
 {
-    struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
+    struct radeon_bo *bo = get_radeon_bo(_buf);
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
     struct drm_radeon_gem_set_tiling args = {};
 
@@ -464,8 +462,7 @@ static struct radeon_winsys_cs_handle *radeon_drm_get_cs_handle(
         struct pb_buffer *_buf)
 {
     /* return radeon_bo. */
-    return (struct radeon_winsys_cs_handle*)
-            get_radeon_bo(pb_buffer(_buf));
+    return (struct radeon_winsys_cs_handle*)get_radeon_bo(_buf);
 }
 
 static unsigned get_pb_usage_from_create_flags(enum radeon_bo_domain domain)
@@ -586,7 +583,7 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
                                            struct winsys_handle *whandle)
 {
     struct drm_gem_flink flink = {};
-    struct radeon_bo *bo = get_radeon_bo(pb_buffer(buffer));
+    struct radeon_bo *bo = get_radeon_bo(buffer);
 
     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
         if (!bo->flinked) {
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index b94881bc4ce..f4ea73a2210 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -80,10 +80,4 @@ void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src)
     pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
 }
 
-static INLINE struct pb_buffer *
-pb_buffer(struct pb_buffer *buffer)
-{
-    return (struct pb_buffer *)buffer;
-}
-
 #endif

From 6eb94fc3444a300a0419c40cfcf356fdd88bc304 Mon Sep 17 00:00:00 2001
From: Vadim Girlin <vadimgirlin@gmail.com>
Date: Wed, 3 Aug 2011 01:04:19 +0400
Subject: [PATCH 202/600] r600g: use backend mask for occlusion queries

Use backend_map kernel query if supported, otherwise analyze ZPASS_DONE
results to get the mask.

Fixes lockups with predicated rendering due to incorrect query buffer
initialization on some cards.

Note: this is a candidate for the 7.11 branch.

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600.h               |  4 +
 .../winsys/r600/drm/evergreen_hw_context.c    |  2 +
 src/gallium/winsys/r600/drm/r600_drm.c        | 59 ++++++++++++
 src/gallium/winsys/r600/drm/r600_hw_context.c | 96 ++++++++++++++++++-
 src/gallium/winsys/r600/drm/r600_priv.h       |  3 +
 5 files changed, 160 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 2e759c79409..2ac5ed465c1 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -94,6 +94,8 @@ struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon);
 unsigned r600_get_clock_crystal_freq(struct radeon *radeon);
 unsigned r600_get_minor_version(struct radeon *radeon);
 unsigned r600_get_num_backends(struct radeon *radeon);
+unsigned r600_get_num_tile_pipes(struct radeon *radeon);
+unsigned r600_get_backend_map(struct radeon *radeon);
 
 /* r600_bo.c */
 struct r600_bo;
@@ -258,6 +260,7 @@ struct r600_context {
 	u32			*pm4;
 	struct list_head	query_list;
 	unsigned		num_query_running;
+	unsigned		backend_mask;
 	struct list_head	fenced_bo;
 	unsigned                max_db; /* for OQ */
 	unsigned                num_dest_buffers;
@@ -279,6 +282,7 @@ struct r600_draw {
 	struct r600_bo		*indices;
 };
 
+void r600_get_backend_mask(struct r600_context *ctx);
 int r600_context_init(struct r600_context *ctx, struct radeon *radeon);
 void r600_context_fini(struct r600_context *ctx);
 void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state);
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 98283ffbefc..7fe2050cd84 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -1018,6 +1018,8 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
 
 	LIST_INITHEAD(&ctx->fenced_bo);
 
+	r600_get_backend_mask(ctx);
+
 	return 0;
 out_err:
 	r600_context_fini(ctx);
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index ab0afea5bf5..8aa8c3df52a 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -50,6 +50,14 @@
 #define RADEON_INFO_NUM_BACKENDS 0xa
 #endif
 
+#ifndef RADEON_INFO_NUM_TILE_PIPES
+#define RADEON_INFO_NUM_TILE_PIPES 0xb
+#endif
+
+#ifndef RADEON_INFO_BACKEND_MAP
+#define RADEON_INFO_BACKEND_MAP 0xd
+#endif
+
 enum radeon_family r600_get_family(struct radeon *r600)
 {
 	return r600->family;
@@ -75,6 +83,16 @@ unsigned r600_get_num_backends(struct radeon *radeon)
 	return radeon->num_backends;
 }
 
+unsigned r600_get_num_tile_pipes(struct radeon *radeon)
+{
+	return radeon->num_tile_pipes;
+}
+
+unsigned r600_get_backend_map(struct radeon *radeon)
+{
+	return radeon->backend_map;
+}
+
 unsigned r600_get_minor_version(struct radeon *radeon)
 {
 	return radeon->minor_version;
@@ -241,6 +259,42 @@ static int radeon_get_num_backends(struct radeon *radeon)
 	return 0;
 }
 
+static int radeon_get_num_tile_pipes(struct radeon *radeon)
+{
+	struct drm_radeon_info info = {};
+	uint32_t num_tile_pipes = 0;
+	int r;
+
+	info.request = RADEON_INFO_NUM_TILE_PIPES;
+	info.value = (uintptr_t)&num_tile_pipes;
+	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
+			sizeof(struct drm_radeon_info));
+	if (r)
+		return r;
+
+	radeon->num_tile_pipes = num_tile_pipes;
+	return 0;
+}
+
+static int radeon_get_backend_map(struct radeon *radeon)
+{
+	struct drm_radeon_info info = {};
+	uint32_t backend_map = 0;
+	int r;
+
+	info.request = RADEON_INFO_BACKEND_MAP;
+	info.value = (uintptr_t)&backend_map;
+	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
+			sizeof(struct drm_radeon_info));
+	if (r)
+		return r;
+
+	radeon->backend_map = backend_map;
+	radeon->backend_map_valid = TRUE;
+
+	return 0;
+}
+
 
 static int radeon_init_fence(struct radeon *radeon)
 {
@@ -362,6 +416,11 @@ static struct radeon *radeon_new(int fd, unsigned device)
 	if (radeon->minor_version >= 9)
 		radeon_get_num_backends(radeon);
 
+	if (radeon->minor_version >= 11) {
+		radeon_get_num_tile_pipes(radeon);
+		radeon_get_backend_map(radeon);
+	}
+
 	radeon->bomgr = r600_bomgr_create(radeon, 1000000);
 	if (radeon->bomgr == NULL) {
 		return NULL;
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 35c086ae680..30af4e8066f 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -39,6 +39,91 @@
 
 #define GROUP_FORCE_NEW_BLOCK	0
 
+/* Get backends mask */
+void r600_get_backend_mask(struct r600_context *ctx)
+{
+	struct r600_bo * buffer;
+	u32 * results;
+	unsigned num_backends = r600_get_num_backends(ctx->radeon);
+	unsigned i, mask = 0;
+
+	/* if backend_map query is supported by the kernel */
+	if (ctx->radeon->backend_map_valid) {
+		unsigned num_tile_pipes = r600_get_num_tile_pipes(ctx->radeon);
+		unsigned backend_map = r600_get_backend_map(ctx->radeon);
+		unsigned item_width, item_mask;
+
+		if (ctx->radeon->chip_class >= EVERGREEN) {
+			item_width = 4;
+			item_mask = 0x7;
+		} else {
+			item_width = 2;
+			item_mask = 0x3;
+		}
+
+		while(num_tile_pipes--) {
+			i = backend_map & item_mask;
+			mask |= (1<<i);
+			backend_map >>= item_width;
+		}
+		if (mask != 0) {
+			ctx->backend_mask = mask;
+			return;
+		}
+	}
+
+	/* otherwise backup path for older kernels */
+
+	/* create buffer for event data */
+	buffer = r600_bo(ctx->radeon, ctx->max_db*16, 1, 0,
+				PIPE_USAGE_STAGING);
+	if (!buffer)
+		goto err;
+
+	/* initialize buffer with zeroes */
+	results = r600_bo_map(ctx->radeon, buffer, PB_USAGE_CPU_WRITE, NULL);
+	if (results) {
+		memset(results, 0, ctx->max_db * 4 * 4);
+		r600_bo_unmap(ctx->radeon, buffer);
+
+		/* emit EVENT_WRITE for ZPASS_DONE */
+		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
+		ctx->pm4[ctx->pm4_cdwords++] = 0;
+		ctx->pm4[ctx->pm4_cdwords++] = 0;
+
+		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
+		ctx->pm4[ctx->pm4_cdwords++] = 0;
+		r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], buffer);
+
+		/* execute */
+		r600_context_flush(ctx);
+
+		/* analyze results */
+		results = r600_bo_map(ctx->radeon, buffer, PB_USAGE_CPU_READ, NULL);
+		if (results) {
+			for(i = 0; i < ctx->max_db; i++) {
+				/* at least highest bit will be set if backend is used */
+				if (results[i*4 + 1])
+					mask |= (1<<i);
+			}
+			r600_bo_unmap(ctx->radeon, buffer);
+		}
+	}
+
+	r600_bo_reference(ctx->radeon, &buffer, NULL);
+
+	if (mask != 0) {
+		ctx->backend_mask = mask;
+		return;
+	}
+
+err:
+	/* fallback to old method - set num_backends lower bits to 1 */
+	ctx->backend_mask = (~((u32)0))>>(32-num_backends);
+	return;
+}
+
 static inline void r600_context_ps_partial_flush(struct r600_context *ctx)
 {
 	if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING))
@@ -898,6 +983,8 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
 
 	ctx->max_db = 4;
 
+	r600_get_backend_mask(ctx);
+
 	return 0;
 out_err:
 	r600_context_fini(ctx);
@@ -1652,7 +1739,6 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu
 void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 {
 	unsigned required_space, new_results_end;
-	int num_backends = r600_get_num_backends(ctx->radeon);
 
 	/* query request needs 6/8 dwords for begin + 6/8 dwords for end */
 	if (query->type == PIPE_QUERY_TIME_ELAPSED)
@@ -1698,9 +1784,11 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 			memset(results, 0, query->result_size);
 
 			/* Set top bits for unused backends */
-			for (i = num_backends; i < ctx->max_db; i++) {
-				results[(i * 4)+1] = 0x80000000;
-				results[(i * 4)+3] = 0x80000000;
+			for (i = 0; i < ctx->max_db; i++) {
+				if (!(ctx->backend_mask & (1<<i))) {
+					results[(i * 4)+1] = 0x80000000;
+					results[(i * 4)+3] = 0x80000000;
+				}
 			}
 			r600_bo_unmap(ctx->radeon, query->buffer);
 		}
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 69f7251c043..75115fdaed7 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -55,6 +55,9 @@ struct radeon {
 	struct r600_bo			*fence_bo;
 	unsigned			clock_crystal_freq;
 	unsigned			num_backends;
+	unsigned			num_tile_pipes;
+	unsigned			backend_map;
+	boolean				backend_map_valid;
 	unsigned                        minor_version;
 
         /* List of buffer handles and its mutex. */

From 2bde0cc95d8db10b6d2c6689ca39c196a81248b0 Mon Sep 17 00:00:00 2001
From: Vadim Girlin <vadimgirlin@gmail.com>
Date: Wed, 3 Aug 2011 15:35:02 +0400
Subject: [PATCH 203/600] r600g: take into account force_add_cf in pops

When we have two ENDIFs in a row, we shouldn't modify the pop_count
for the same alu clause twice.

Fixes https://bugs.freedesktop.org/show_bug.cgi?id=38163

Note: this is a candidate for the 7.11 branch.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600_shader.c | 37 ++++++++++++++++----------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index fc56656f55d..c55cdd707eb 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2932,25 +2932,34 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
 
 static int pops(struct r600_shader_ctx *ctx, int pops)
 {
-	int alu_pop = 3;
-	if (ctx->bc->cf_last) {
-		if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
-			alu_pop = 0;
-		else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
-			alu_pop = 1;
+	unsigned force_pop = ctx->bc->force_add_cf;
+
+	if (!force_pop) {
+		int alu_pop = 3;
+		if (ctx->bc->cf_last) {
+			if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
+				alu_pop = 0;
+			else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
+				alu_pop = 1;
+		}
+		alu_pop += pops;
+		if (alu_pop == 1) {
+			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
+			ctx->bc->force_add_cf = 1;
+		} else if (alu_pop == 2) {
+			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
+			ctx->bc->force_add_cf = 1;
+		} else {
+			force_pop = 1;
+		}
 	}
-	alu_pop += pops;
-	if (alu_pop == 1) {
-		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
-		ctx->bc->force_add_cf = 1;
-	} else if (alu_pop == 2) {
-		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
-		ctx->bc->force_add_cf = 1;
-	} else {
+
+	if (force_pop) {
 		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
 		ctx->bc->cf_last->pop_count = pops;
 		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
 	}
+
 	return 0;
 }
 

From babb26776fadb683be9dacb492efcdc455b176ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 03:23:12 +0200
Subject: [PATCH 204/600] r600g: remove more of unused code

This is a follow-up to f6df430a85141f6a384c18079fb5b2ad848dac0d.
---
 src/gallium/drivers/r600/r600_pipe.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 615f0688eb3..5159ba350e4 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -119,22 +119,9 @@ static void r600_flush(struct pipe_context *ctx,
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_fence **rfence = (struct r600_fence**)fence;
 
-#if 0
-	static int dc = 0;
-	char dname[256];
-#endif
-
 	if (rfence)
 		*rfence = r600_create_fence(rctx);
 
-#if 0
-	sprintf(dname, "gallium-%08d.bof", dc);
-	if (dc < 20) {
-		r600_context_dump_bof(&rctx->ctx, dname);
-		R600_ERR("dumped %s\n", dname);
-	}
-	dc++;
-#endif
 	r600_context_flush(&rctx->ctx);
 }
 

From eeed782ecb9fa92a958cb650c0a5a536556dc611 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?RALOVICH=2C=20Krist=C3=B3f?= <tade60@freemail.hu>
Date: Sun, 31 Jul 2011 23:49:43 +0200
Subject: [PATCH 205/600] gbm/dri: avoid crash in dri_screen_create

---
 src/gbm/backends/dri/gbm_dri.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 6bb7848d830..9de8cb61162 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -194,6 +194,8 @@ dri_screen_create(struct gbm_dri_device *dri)
    dri->screen = dri->dri2->createNewScreen(0, dri->base.base.fd,
                                             dri->extensions,
                                             &dri->driver_configs, dri);
+   if (dri->screen == NULL)
+      return -1;
 
    extensions = dri->core->getExtensions(dri->screen);
    if (dri_bind_extensions(dri, dri_core_extensions, extensions) < 0) {

From ca6bbfd76960731926c99d0b6257b42344596794 Mon Sep 17 00:00:00 2001
From: Benjamin Franzke <benjaminfranzke@googlemail.com>
Date: Thu, 4 Aug 2011 13:37:42 +0200
Subject: [PATCH 206/600] gbm: link gbm_gallium_drm.so against math library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This avoids the following runtime error with EGL on platforms that
require linking with libm for nontrivial math functions:

failed to load module: /xorg/lib64/gbm/gbm_gallium_drm.so: undefined
symbol: powf

(Based on Kristóf RALOVICHs patch and Ian's suggestions in
http://lists.freedesktop.org/archives/mesa-dev/2011-August/010036.html)
---
 src/gallium/targets/gbm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile
index 3ad3eca1d13..b38782c4070 100644
--- a/src/gallium/targets/gbm/Makefile
+++ b/src/gallium/targets/gbm/Makefile
@@ -15,7 +15,7 @@ GBM_INCLUDES = \
 	       -I$(TOP)/src/gallium/auxiliary \
 	       -I$(TOP)/src/gallium/include \
 
-GBM_LIBS = $(LIBUDEV_LIBS) $(LIBDRM_LIB) \
+GBM_LIBS = $(LIBUDEV_LIBS) $(LIBDRM_LIB) -lm \
 	   $(TOP)/src/gallium/state_trackers/gbm/libgbm.a \
 	   $(TOP)/src/gallium/drivers/identity/libidentity.a \
 	   $(TOP)/src/gallium/drivers/galahad/libgalahad.a \

From 32f4cf38085e4056b8e4a9fc78fea28897a1d05f Mon Sep 17 00:00:00 2001
From: Benjamin Franzke <benjaminfranzke@googlemail.com>
Date: Wed, 29 Jun 2011 08:49:39 +0200
Subject: [PATCH 207/600] egl/gbm: Fix EGL_DEFAULT_DISPLAY

---
 src/egl/drivers/dri2/egl_dri2.c               |  7 ++++++
 src/egl/drivers/dri2/egl_dri2.h               |  1 +
 src/egl/drivers/dri2/platform_drm.c           | 25 +++++++++++++++++--
 .../state_trackers/egl/drm/native_drm.c       | 23 +++++++++++++----
 .../state_trackers/egl/drm/native_drm.h       |  4 +++
 5 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 0aca929e6aa..9a37ea4bbfc 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -591,6 +591,13 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
       case _EGL_PLATFORM_WAYLAND:
          wl_display_destroy(dri2_dpy->wl_dpy);
          break;
+#endif
+#ifdef HAVE_DRM_PLATFORM
+      case _EGL_PLATFORM_DRM:
+         if (dri2_dpy->own_gbm_device) {
+            gbm_device_destroy(&dri2_dpy->gbm_dri->base.base);
+         }
+         break;
 #endif
       default:
          break;
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index 3854200bc69..a7297188af2 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -86,6 +86,7 @@ struct dri2_egl_display
 
 #ifdef HAVE_DRM_PLATFORM
    struct gbm_dri_device    *gbm_dri;
+   int                       own_gbm_device;
 #endif
 
    char                     *device_name;
diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c
index 579baf9f9d2..04b10e279ec 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -30,6 +30,10 @@
 #include <string.h>
 #include <xf86drm.h>
 #include <dlfcn.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
 
 #include "egl_dri2.h"
 
@@ -90,6 +94,7 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
 {
    struct dri2_egl_display *dri2_dpy;
    struct gbm_device *gbm;
+   int fd = -1;
    int i;
 
    dri2_dpy = malloc(sizeof *dri2_dpy);
@@ -100,7 +105,15 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
 
    disp->DriverData = (void *) dri2_dpy;
 
-   gbm = (struct gbm_device *) disp->PlatformDisplay;
+   gbm = disp->PlatformDisplay;
+   if (gbm == NULL) {
+      fd = open("/dev/dri/card0", O_RDWR);
+      dri2_dpy->own_gbm_device = 1;
+      gbm = gbm_create_device(fd);
+      if (gbm == NULL)
+         return EGL_FALSE;
+   }
+
    if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) {
       free(dri2_dpy);
       return EGL_FALSE;
@@ -112,7 +125,15 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
       return EGL_FALSE;
    }
 
-   dri2_dpy->fd = gbm_device_get_fd(gbm);
+   if (fd < 0) {
+      fd = dup(gbm_device_get_fd(gbm));
+      if (fd < 0) {
+         free(dri2_dpy);
+         return EGL_FALSE;
+      }
+   }
+
+   dri2_dpy->fd = fd;
    dri2_dpy->device_name = dri2_get_device_name_for_fd(dri2_dpy->fd);
    dri2_dpy->driver_name = dri2_dpy->gbm_dri->base.driver_name;
 
diff --git a/src/gallium/state_trackers/egl/drm/native_drm.c b/src/gallium/state_trackers/egl/drm/native_drm.c
index 47910de8d3c..c013769e57d 100644
--- a/src/gallium/state_trackers/egl/drm/native_drm.c
+++ b/src/gallium/state_trackers/egl/drm/native_drm.c
@@ -134,8 +134,11 @@ drm_display_destroy(struct native_display *ndpy)
    if (drmdpy->device_name)
       FREE(drmdpy->device_name);
 
-   if (drmdpy->fd >= 0)
-      close(drmdpy->fd);
+   if (drmdpy->own_gbm) {
+      gbm_device_destroy(&drmdpy->gbmdrm->base.base);
+      if (drmdpy->fd >= 0)
+         close(drmdpy->fd);
+   }
 
    FREE(drmdpy);
 }
@@ -258,7 +261,7 @@ drm_display_init_screen(struct native_display *ndpy)
 }
 
 static struct native_display *
-drm_create_display(struct gbm_gallium_drm_device *gbmdrm,
+drm_create_display(struct gbm_gallium_drm_device *gbmdrm, int own_gbm,
                    const struct native_event_handler *event_handler)
 {
    struct drm_display *drmdpy;
@@ -267,6 +270,8 @@ drm_create_display(struct gbm_gallium_drm_device *gbmdrm,
    if (!drmdpy)
       return NULL;
 
+   drmdpy->gbmdrm = gbmdrm;
+   drmdpy->own_gbm = own_gbm;
    drmdpy->fd = gbmdrm->base.base.fd;
    drmdpy->device_name = drm_get_device_name(drmdpy->fd);
 
@@ -302,22 +307,30 @@ native_create_display(void *dpy, boolean use_sw)
 {
    struct gbm_gallium_drm_device *gbm;
    int fd;
+   int own_gbm = 0;
 
    gbm = dpy;
 
    if (gbm == NULL) {
       fd = open("/dev/dri/card0", O_RDWR);
+      /* FIXME: Use an internal constructor to create a gbm
+       * device with gallium backend directly, without setenv */
+      setenv("GBM_BACKEND", "gbm_gallium_drm.so", 1);
       gbm = gbm_gallium_drm_device(gbm_create_device(fd));
+      own_gbm = 1;
    }
 
    if (gbm == NULL)
       return NULL;
    
    if (strcmp(gbm_device_get_backend_name(&gbm->base.base), "drm") != 0 ||
-       gbm->base.type != GBM_DRM_DRIVER_TYPE_GALLIUM)
+       gbm->base.type != GBM_DRM_DRIVER_TYPE_GALLIUM) {
+      if (own_gbm)
+         gbm_device_destroy(&gbm->base.base);
       return NULL;
+   }
 
-   return drm_create_display(gbm, drm_event_handler);
+   return drm_create_display(gbm, own_gbm, drm_event_handler);
 }
 
 static const struct native_platform drm_platform = {
diff --git a/src/gallium/state_trackers/egl/drm/native_drm.h b/src/gallium/state_trackers/egl/drm/native_drm.h
index 675a58a1922..18cebf4e276 100644
--- a/src/gallium/state_trackers/egl/drm/native_drm.h
+++ b/src/gallium/state_trackers/egl/drm/native_drm.h
@@ -41,6 +41,8 @@
 #include "common/native_wayland_drm_bufmgr_helper.h"
 #endif
 
+#include "gbm_gallium_drmint.h"
+
 struct drm_config;
 struct drm_crtc;
 struct drm_connector;
@@ -52,6 +54,8 @@ struct drm_display {
 
    const struct native_event_handler *event_handler;
 
+   struct gbm_gallium_drm_device *gbmdrm;
+   int own_gbm;
    int fd;
    char *device_name;
    struct drm_config *config;

From 57590e173b6f421b1015190aa3c0011ea55f31d8 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 28 Jul 2011 15:26:01 +0200
Subject: [PATCH 208/600] st/mesa: determine Const.MaxSamples in
 init_extensions

v2: Check for non-pow2 sample counts as well.
---
 src/mesa/state_tracker/st_extensions.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index b5f6d356eb0..8e900934054 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -228,6 +228,7 @@ void st_init_extensions(struct st_context *st)
 {
    struct pipe_screen *screen = st->pipe->screen;
    struct gl_context *ctx = st->ctx;
+   int i;
 
    /*
     * Extensions that are supported by all Gallium drivers:
@@ -605,6 +606,16 @@ void st_init_extensions(struct st_context *st)
       ctx->Extensions.EXT_packed_float = GL_TRUE;
    }
 
+   /* Maximum sample count. */
+   for (i = 16; i > 0; --i) {
+      if (screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_UNORM,
+                                      PIPE_TEXTURE_2D, i,
+                                      PIPE_BIND_RENDER_TARGET)) {
+         ctx->Const.MaxSamples = i;
+         break;
+      }
+   }
+
    if (screen->get_param(screen, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE)) {
       ctx->Extensions.ARB_seamless_cube_map = GL_TRUE;
       ctx->Extensions.AMD_seamless_cubemap_per_texture = GL_TRUE;

From 94822c6d83b7811db2a02bb4416df02ae225ba47 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 3 Aug 2011 15:43:16 +0200
Subject: [PATCH 209/600] gallium: extend resource_resolve to accommodate
 BlitFramebuffer

Resolve via glBlitFramebuffer allows resolving a sub-region of a
renderbuffer to a different location in any mipmap level of some
other texture, and, with a new extension, even scaling. Therefore,
location and size parameters are needed.

The mask parameter was added because resolving only depth or only
stencil of a combined buffer is possible as well.

Full information about the blit operation allows the drivers to
take the most efficient path they possibly can.
---
 src/gallium/docs/source/context.rst  |  9 ++++++++-
 src/gallium/include/pipe/p_context.h |  8 +++-----
 src/gallium/include/pipe/p_defines.h |  4 ++++
 src/gallium/include/pipe/p_state.h   | 28 ++++++++++++++++++++++++++++
 4 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index 25a3245066c..3faf801b4b1 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -329,8 +329,15 @@ textured quad blitter.. The source and destination may be the same resource,
 but overlapping blits are not permitted.
 
 ``resource_resolve`` resolves a multisampled resource into a non-multisampled
-one. Formats and dimensions must match. This function must be present if a driver
+one. Their formats must match. This function must be present if a driver
 supports multisampling.
+The region that is to be resolved is described by ``pipe_resolve_info``, which
+provides a source and a destination rectangle.
+The source rectangle may be vertically flipped, but otherwise the dimensions
+of the rectangles must match, unless PIPE_CAP_SCALED_RESOLVE is supported,
+in which case scaling and horizontal flipping are allowed as well.
+The result of resolving depth/stencil values may be any function of the values at
+the sample points, but returning the value of the centermost sample is preferred.
 
 The interfaces to these calls are likely to change to make it easier
 for a driver to batch multiple blits with the same source and
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 3f6d90d1bf4..da3ee87515f 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -49,6 +49,7 @@ struct pipe_index_buffer;
 struct pipe_query;
 struct pipe_poly_stipple;
 struct pipe_rasterizer_state;
+struct pipe_resolve_info;
 struct pipe_resource;
 struct pipe_sampler_state;
 struct pipe_sampler_view;
@@ -268,13 +269,10 @@ struct pipe_context {
 
    /**
     * Resolve a multisampled resource into a non-multisampled one.
-    * Source and destination must have the same size and same format.
+    * Source and destination must be of the same format.
     */
    void (*resource_resolve)(struct pipe_context *pipe,
-                            struct pipe_resource *dst,
-                            unsigned dst_layer,
-                            struct pipe_resource *src,
-                            unsigned src_layer);
+                            const struct pipe_resolve_info *info);
 
    /*@}*/
 
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 79b89699566..7ffdf97fdfb 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -99,6 +99,9 @@ enum pipe_error {
 #define PIPE_MASK_B  0x4
 #define PIPE_MASK_A  0x8
 #define PIPE_MASK_RGBA 0xf
+#define PIPE_MASK_Z  0x10
+#define PIPE_MASK_S  0x20
+#define PIPE_MASK_ZS 0x30
 
 
 /**
@@ -468,6 +471,7 @@ enum pipe_cap {
    PIPE_CAP_MIXED_COLORBUFFER_FORMATS = 46,
    PIPE_CAP_SEAMLESS_CUBE_MAP = 47,
    PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE = 48,
+   PIPE_CAP_SCALED_RESOLVE = 49
 };
 
 /* Shader caps not specific to any single stage */
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index d442c15c02a..840b3ee0e37 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -483,6 +483,34 @@ struct pipe_draw_info
 };
 
 
+/**
+ * Information to describe a resource_resolve call.
+ */
+struct pipe_resolve_info
+{
+   struct {
+      struct pipe_resource *res;
+      unsigned level;
+      unsigned layer;
+      int x0; /**< always left */
+      int y0; /**< always top */
+      int x1; /**< determines scale if PIPE_CAP_SCALED_RESOLVE is supported */
+      int y1; /**< determines scale if PIPE_CAP_SCALED_RESOLVE is supported */
+   } dst;
+
+   struct {
+      struct pipe_resource *res;
+      unsigned layer;
+      int x0;
+      int y0;
+      int x1; /**< may be < x0 only if PIPE_CAP_SCALED_RESOLVE is supported */
+      int y1; /**< may be < y1 even if PIPE_CAP_SCALED_RESOLVE not supported */
+   } src;
+
+   unsigned mask; /**< PIPE_MASK_RGBA, Z, S or ZS */
+};
+
+
 #ifdef __cplusplus
 }
 #endif

From f253d83bc72e7d26df8cd3a04747b3d46a8543e6 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 3 Aug 2011 16:01:41 +0200
Subject: [PATCH 210/600] st/mesa: implement multisample resolve via
 BlitFramebuffer

---
 src/mesa/state_tracker/st_cb_blit.c | 116 ++++++++++++++++++++++++++--
 1 file changed, 111 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c
index 416be194d11..276d10fb557 100644
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -61,6 +61,81 @@ st_destroy_blit(struct st_context *st)
 
 #if FEATURE_EXT_framebuffer_blit
 
+static void
+st_BlitFramebuffer_resolve(struct gl_context *ctx,
+                           GLbitfield mask,
+                           struct pipe_resolve_info *info)
+{
+   const GLbitfield depthStencil = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+
+   struct st_context *st = st_context(ctx);
+
+   struct st_renderbuffer *srcRb, *dstRb;
+
+   if (mask & GL_COLOR_BUFFER_BIT) {
+      srcRb = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+      dstRb = st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]);
+
+      info->mask = PIPE_MASK_RGBA;
+
+      info->src.res = srcRb->texture;
+      info->src.layer = srcRb->surface->u.tex.first_layer;
+      info->dst.res = dstRb->texture;
+      info->dst.level = dstRb->surface->u.tex.level;
+      info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+      st->pipe->resource_resolve(st->pipe, info);
+   }
+
+   if (mask & depthStencil) {
+      struct gl_renderbuffer_attachment *srcDepth, *srcStencil;
+      struct gl_renderbuffer_attachment *dstDepth, *dstStencil;
+
+      srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH];
+      dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH];
+      srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL];
+      dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL];
+
+      const boolean combined =
+         st_is_depth_stencil_combined(srcDepth, srcStencil) &&
+         st_is_depth_stencil_combined(dstDepth, dstStencil);
+
+      if ((mask & GL_DEPTH_BUFFER_BIT) || combined) {
+         /* resolve depth and, if combined and requested, stencil as well */
+         srcRb = st_renderbuffer(srcDepth->Renderbuffer);
+         dstRb = st_renderbuffer(dstDepth->Renderbuffer);
+
+         info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0;
+         if (combined && (mask & GL_STENCIL_BUFFER_BIT))
+            info->mask |= PIPE_MASK_S;
+
+         info->src.res = srcRb->texture;
+         info->src.layer = srcRb->surface->u.tex.first_layer;
+         info->dst.res = dstRb->texture;
+         info->dst.level = dstRb->surface->u.tex.level;
+         info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+         st->pipe->resource_resolve(st->pipe, info);
+      }
+
+      if (mask & GL_STENCIL_BUFFER_BIT) {
+         /* resolve separate stencil buffer */
+         srcRb = st_renderbuffer(srcStencil->Renderbuffer);
+         dstRb = st_renderbuffer(dstStencil->Renderbuffer);
+
+         info->mask = PIPE_MASK_S;
+
+         info->src.res = srcRb->texture;
+         info->src.layer = srcRb->surface->u.tex.first_layer;
+         info->dst.res = dstRb->texture;
+         info->dst.level = dstRb->surface->u.tex.level;
+         info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+         st->pipe->resource_resolve(st->pipe, info);
+      }
+   }
+}
+
 static void
 st_BlitFramebuffer(struct gl_context *ctx,
                    GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
@@ -95,6 +170,42 @@ st_BlitFramebuffer(struct gl_context *ctx,
       srcY1 = readFB->Height - srcY1;
    }
 
+   /* Disable conditional rendering. */
+   if (st->render_condition) {
+      st->pipe->render_condition(st->pipe, NULL, 0);
+   }
+
+   if (readFB->Visual.sampleBuffers > drawFB->Visual.sampleBuffers) {
+      struct pipe_resolve_info info;
+
+      if (dstX0 < dstX1) {
+         info.dst.x0 = dstX0;
+         info.dst.x1 = dstX1;
+         info.src.x0 = srcX0;
+         info.src.x1 = srcX1;
+      } else {
+         info.dst.x0 = dstX1;
+         info.dst.x1 = dstX0;
+         info.src.x0 = srcX1;
+         info.src.x1 = srcX0;
+      }
+      if (dstY0 < dstY1) {
+         info.dst.y0 = dstY0;
+         info.dst.y1 = dstY1;
+         info.src.y0 = srcY0;
+         info.src.y1 = srcY1;
+      } else {
+         info.dst.y0 = dstY1;
+         info.dst.y1 = dstY0;
+         info.src.y0 = srcY1;
+         info.src.y1 = srcY0;
+      }
+
+      st_BlitFramebuffer_resolve(ctx, mask, &info); /* filter doesn't apply */
+
+      goto done;
+   }
+
    if (srcY0 > srcY1 && dstY0 > dstY1) {
       /* Both src and dst are upside down.  Swap Y to make it
        * right-side up to increase odds of using a fast path.
@@ -109,11 +220,6 @@ st_BlitFramebuffer(struct gl_context *ctx,
       dstY1 = tmp;
    }
 
-   /* Disable conditional rendering. */
-   if (st->render_condition) {
-      st->pipe->render_condition(st->pipe, NULL, 0);
-   }
-
    if (mask & GL_COLOR_BUFFER_BIT) {
       struct gl_renderbuffer_attachment *srcAtt =
          &readFB->Attachment[readFB->_ColorReadBufferIndex];

From e9d84dab8817a0a7e463229b9a2820b00a9ce667 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 28 Jul 2011 15:54:53 +0200
Subject: [PATCH 211/600] nv50: implement resource_resolve with custom blit

---
 src/gallium/drivers/nv50/nv50_context.h       |   3 +-
 src/gallium/drivers/nv50/nv50_formats.c       |   4 +-
 src/gallium/drivers/nv50/nv50_screen.c        |   4 +
 src/gallium/drivers/nv50/nv50_screen.h        |   6 +
 src/gallium/drivers/nv50/nv50_shader_state.c  |  11 +-
 .../drivers/nv50/nv50_state_validate.c        |   4 +-
 src/gallium/drivers/nv50/nv50_surface.c       | 536 +++++++++++++++++-
 src/gallium/drivers/nv50/nv50_vbo.c           |   2 +-
 8 files changed, 559 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index c1226d5eb26..284db69e312 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -171,7 +171,8 @@ void nv50_validate_derived_rs(struct nv50_context *);
 extern void nv50_init_state_functions(struct nv50_context *);
 
 /* nv50_state_validate.c */
-extern boolean nv50_state_validate(struct nv50_context *);
+/* @words: check for space before emitting relocs */
+extern boolean nv50_state_validate(struct nv50_context *, unsigned words);
 
 /* nv50_surface.c */
 extern void nv50_clear(struct pipe_context *, unsigned buffers,
diff --git a/src/gallium/drivers/nv50/nv50_formats.c b/src/gallium/drivers/nv50/nv50_formats.c
index be43147468a..34502d0a397 100644
--- a/src/gallium/drivers/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nv50/nv50_formats.c
@@ -116,7 +116,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
     SAMPLER_VIEW | DEPTH_STENCIL },
 
    [PIPE_FORMAT_Z24_UNORM_S8_USCALED] = { NV50_ZETA_FORMAT_Z24_S8_UNORM,
-    B_(C0, C0, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_S8, 0),
+    B_(C0, C1, C0, ONE_FLOAT, UNORM, UINT, UINT, UINT, Z24_S8, 0),
     SAMPLER_VIEW | DEPTH_STENCIL },
 
    [PIPE_FORMAT_Z24X8_UNORM] = { NV50_ZETA_FORMAT_Z24_X8_UNORM,
@@ -124,7 +124,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
     SAMPLER_VIEW | DEPTH_STENCIL },
 
    [PIPE_FORMAT_S8_USCALED_Z24_UNORM] = { NV50_ZETA_FORMAT_S8_Z24_UNORM,
-    B_(C1, C1, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, S8_Z24, 0),
+    B_(C1, C0, C1, ONE_FLOAT, UINT, UNORM, UINT, UINT, S8_Z24, 0),
     SAMPLER_VIEW | DEPTH_STENCIL },
 
    [PIPE_FORMAT_Z32_FLOAT] = { NV50_ZETA_FORMAT_Z32_FLOAT,
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 4139b85a9ae..07a74cc2b9f 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -91,6 +91,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_SHADOW_MAP:
    case PIPE_CAP_NPOT_TEXTURES:
    case PIPE_CAP_ANISOTROPIC_FILTER:
+   case PIPE_CAP_SCALED_RESOLVE:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
       return nv50_screen(pscreen)->tesla->grclass >= NVA0_3D;
@@ -604,6 +605,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 
    screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0xfe0);
 
+   if (!nv50_blitctx_create(screen))
+      goto fail;
+
    nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
 
    return pscreen;
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index 64ad209a728..315ca80c0d2 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -21,6 +21,8 @@ struct nv50_context;
 
 #define NV50_SCREEN_RESIDENT_BO_COUNT 5
 
+struct nv50_blitctx;
+
 struct nv50_screen {
    struct nouveau_screen base;
    struct nouveau_winsys *nvws;
@@ -39,6 +41,8 @@ struct nv50_screen {
    struct nouveau_resource *gp_code_heap;
    struct nouveau_resource *fp_code_heap;
 
+   struct nv50_blitctx *blitctx;
+
    struct {
       void **entries;
       int next;
@@ -71,6 +75,8 @@ nv50_screen(struct pipe_screen *screen)
    return (struct nv50_screen *)screen;
 }
 
+boolean nv50_blitctx_create(struct nv50_screen *);
+
 void nv50_screen_make_buffers_resident(struct nv50_screen *);
 
 int nv50_screen_tic_alloc(struct nv50_screen *, void *);
diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c
index e5b10c37bef..d73f7c7f213 100644
--- a/src/gallium/drivers/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nv50/nv50_shader_state.c
@@ -130,13 +130,14 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
    int ret;
    unsigned size;
 
-   if (prog->translated)
+   if (!prog->translated) {
+      prog->translated = nv50_program_translate(prog);
+      if (!prog->translated)
+         return FALSE;
+   } else
+   if (prog->res)
       return TRUE;
 
-   prog->translated = nv50_program_translate(prog);
-   if (!prog->translated)
-      return FALSE;
-
    if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap;
    else
    if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 8b0b08f8e93..44f2d25c1a7 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -350,7 +350,7 @@ static struct state_validate {
 #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
 
 boolean
-nv50_state_validate(struct nv50_context *nv50)
+nv50_state_validate(struct nv50_context *nv50, unsigned words)
 {
    unsigned i;
 
@@ -367,6 +367,8 @@ nv50_state_validate(struct nv50_context *nv50)
       nv50->dirty = 0;
    }
 
+   MARK_RING(nv50->screen->base.channel, words, 0);
+
    nv50_bufctx_emit_relocs(nv50);
 
    return TRUE;
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index eefbaad6483..1a5077e970b 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -368,7 +368,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
 
    /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
    nv50->dirty &= NV50_NEW_FRAMEBUFFER;
-   if (!nv50_state_validate(nv50))
+   if (!nv50_state_validate(nv50, 9 + (fb->nr_cbufs * 2)))
       return;
 
    if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
@@ -405,12 +405,546 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
    nv50->dirty = dirty & ~NV50_NEW_FRAMEBUFFER;
 }
 
+
+struct nv50_blitctx
+{
+   struct nv50_screen *screen;
+   struct {
+      struct pipe_framebuffer_state fb;
+      struct nv50_program *vp;
+      struct nv50_program *gp;
+      struct nv50_program *fp;
+      unsigned num_textures[3];
+      unsigned num_samplers[3];
+      struct pipe_sampler_view *texture;
+      struct nv50_tsc_entry *sampler;
+      unsigned dirty;
+      unsigned clip_nr;
+   } saved;
+   struct nv50_program vp;
+   struct nv50_program fp;
+   struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */
+   uint32_t fp_offset;
+   uint16_t color_mask;
+   uint8_t filter;
+};
+
+static void
+nv50_blitctx_make_vp(struct nv50_blitctx *blit)
+{
+   static const uint32_t code[] =
+   {
+      0x10000001, /* mov b32 o[0x00] s[0x00] */ /* HPOS.x */
+      0x0423c788,
+      0x10000205, /* mov b32 o[0x04] s[0x04] */ /* HPOS.y */
+      0x0423c788,
+      0x10000409, /* mov b32 o[0x08] s[0x08] */ /* TEXC.x */
+      0x0423c788,
+      0x1000060d, /* mov b32 o[0x0c] s[0x0c] */ /* TEXC.y */
+      0x0423c788,
+      0x10000811, /* exit mov b32 o[0x10] s[0x10] */ /* TEXC.z */
+      0x0423c789,
+   };
+
+   blit->vp.type = PIPE_SHADER_VERTEX;
+   blit->vp.translated = TRUE;
+   blit->vp.code = (uint32_t *)code; /* const_cast */
+   blit->vp.code_size = sizeof(code);
+   blit->vp.max_gpr = 4;
+   blit->vp.max_out = 5;
+   blit->vp.out_nr = 2;
+   blit->vp.out[0].mask = 0x3;
+   blit->vp.out[0].sn = TGSI_SEMANTIC_POSITION;
+   blit->vp.out[1].hw = 2;
+   blit->vp.out[1].mask = 0x7;
+   blit->vp.out[1].sn = TGSI_SEMANTIC_GENERIC;
+   blit->vp.vp.attrs[0] = 0x73;
+   blit->vp.vp.psiz = 0x40;
+   blit->vp.vp.edgeflag = 0x40;
+}
+
+static void
+nv50_blitctx_make_fp(struct nv50_blitctx *blit)
+{
+   static const uint32_t code[] =
+   {
+      /* 3 coords RGBA in, RGBA out, also for Z32_FLOAT(_S8X24_USCALED) */
+      0x80000000, /* interp $r0 v[0x0] */
+      0x80010004, /* interp $r1 v[0x4] */
+      0x80020009, /* interp $r2 flat v[0x8] */
+      0x00040780,
+      0xf6800001, /* texauto live { $r0,1,2,3 } $t0 $s0 { $r0,1,2 } */
+      0x0000c785, /* exit */
+
+      /* 3 coords ZS in, S encoded in R, Z encoded in GBA (8_UNORM) */
+      0x80000000, /* interp $r0 v[0x00] */
+      0x80010004, /* interp $r1 v[0x04] */
+      0x80020009, /* interp $r2 flat v[0x8] */
+      0x00040780,
+      0xf6800001, /* texauto live { $r0,1,#,# } $t0 $s0 { $r0,1,2 } */
+      0x00000784,
+      0xc03f0009, /* mul f32 $r2 $r0 (2^24 - 1) */
+      0x04b7ffff,
+      0xa0000201, /* cvt f32 $r0 s32 $r1 */
+      0x44014780,
+      0xa0000409, /* cvt rni s32 $r2 f32 $r2 */
+      0x8c004780,
+      0xc0010001, /* mul f32 $r0 $r0 1/0xff */
+      0x03b8080b,
+      0xd03f0405, /* and b32 $r1 $r2 0x0000ff */
+      0x0000000f,
+      0xd000040d, /* and b32 $r3 $r2 0xff0000 */
+      0x000ff003,
+      0xd0000409, /* and b32 $r2 $r2 0x00ff00 */
+      0x00000ff3,
+      0xa0000205, /* cvt f32 $r1 s32 $r1 */
+      0x44014780,
+      0xa000060d, /* cvt f32 $r3 s32 $r3 */
+      0x44014780,
+      0xa0000409, /* cvt f32 $r2 s32 $r2 */
+      0x44014780,
+      0xc0010205, /* mul f32 $r1 $r1 1/0x0000ff */
+      0x03b8080b,
+      0xc001060d, /* mul f32 $r3 $r3 1/0x00ff00 */
+      0x0338080b,
+      0xc0010409, /* mul f32 $r2 $r2 1/0xff0000 */
+      0x0378080b,
+      0xf0000001, /* exit never nop */
+      0xe0000001,
+
+      /* 3 coords ZS in, Z encoded in RGB, S encoded in A (U8_UNORM) */
+      0x80000000, /* interp $r0 v[0x00] */
+      0x80010004, /* interp $r1 v[0x04] */
+      0x80020009, /* interp $r2 flat v[0x8] */
+      0x00040780,
+      0xf6800001, /* texauto live { $r0,1,#,# } $t0 $s0 { $r0,1,2 } */
+      0x00000784,
+      0xc03f0009, /* mul f32 $r2 $r0 (2^24 - 1) */
+      0x04b7ffff,
+      0xa0000281, /* cvt f32 $r3 s32 $r1 */
+      0x44014780,
+      0xa0000409, /* cvt rni s32 $r2 f32 $r2 */
+      0x8c004780,
+      0xc001060d, /* mul f32 $r3 $r3 1/0xff */
+      0x03b8080b,
+      0xd03f0401, /* and b32 $r0 $r2 0x0000ff */
+      0x0000000f,
+      0xd0000405, /* and b32 $r1 $r2 0x00ff00 */
+      0x00000ff3,
+      0xd0000409, /* and b32 $r2 $r2 0xff0000 */
+      0x000ff003,
+      0xa0000001, /* cvt f32 $r0 s32 $r0 */
+      0x44014780,
+      0xa0000205, /* cvt f32 $r1 s32 $r1 */
+      0x44014780,
+      0xa0000409, /* cvt f32 $r2 s32 $r2 */
+      0x44014780,
+      0xc0010001, /* mul f32 $r0 $r0 1/0x0000ff */
+      0x03b8080b,
+      0xc0010205, /* mul f32 $r1 $r1 1/0x00ff00 */
+      0x0378080b,
+      0xc0010409, /* mul f32 $r2 $r2 1/0xff0000 */
+      0x0338080b,
+      0xf0000001, /* exit never nop */
+      0xe0000001
+   };
+
+   blit->fp.type = PIPE_SHADER_FRAGMENT;
+   blit->fp.translated = TRUE;
+   blit->fp.code = (uint32_t *)code; /* const_cast */
+   blit->fp.code_size = sizeof(code);
+   blit->fp.max_gpr = 4;
+   blit->fp.max_out = 4;
+   blit->fp.in_nr = 1;
+   blit->fp.in[0].mask = 0x7; /* last component flat */
+   blit->fp.in[0].linear = 1;
+   blit->fp.in[0].sn = TGSI_SEMANTIC_GENERIC;
+   blit->fp.out_nr = 1;
+   blit->fp.out[0].mask = 0xf;
+   blit->fp.out[0].sn = TGSI_SEMANTIC_COLOR;
+   blit->fp.fp.interp = 0x00020403;
+   blit->fp.gp.primid = 0x80;
+}
+
+static void
+nv50_blitctx_make_sampler(struct nv50_blitctx *blit)
+{
+   /* clamp to edge, min/max lod = 0, nearest filtering */
+
+   blit->sampler[0].id = -1;
+
+   blit->sampler[0].tsc[0] = 0x00000092;
+   blit->sampler[0].tsc[1] = 0x00000051;
+
+   /* clamp to edge, min/max lod = 0, bilinear filtering */
+
+   blit->sampler[1].id = -1;
+
+   blit->sampler[1].tsc[0] = 0x00000092;
+   blit->sampler[1].tsc[1] = 0x00000062;
+}
+
+/* Since shaders cannot export stencil, we cannot copy stencil values when
+ * rendering to ZETA, so we attach the ZS surface to a colour render target.
+ */
+static INLINE enum pipe_format
+nv50_blit_zeta_to_colour_format(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_Z16_UNORM:               return PIPE_FORMAT_R16_UNORM;
+   case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+   case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:             return PIPE_FORMAT_R8G8B8A8_UNORM;
+   case PIPE_FORMAT_Z32_FLOAT:               return PIPE_FORMAT_R32_FLOAT;
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED: return PIPE_FORMAT_R32G32_FLOAT;
+   default:
+      assert(0);
+      return PIPE_FORMAT_NONE;
+   }
+}
+
+static void
+nv50_blitctx_get_color_mask_and_fp(struct nv50_blitctx *blit,
+                                   enum pipe_format format, uint8_t mask)
+{
+   blit->color_mask = 0;
+
+   switch (format) {
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+      blit->fp_offset = 160;
+      if (mask & PIPE_MASK_Z)
+         blit->color_mask |= 0x0111;
+      if (mask & PIPE_MASK_S)
+         blit->color_mask |= 0x1000;
+      break;
+   case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+      blit->fp_offset = 24;
+      if (mask & PIPE_MASK_Z)
+         blit->color_mask |= 0x1110;
+      if (mask & PIPE_MASK_S)
+         blit->color_mask |= 0x0001;
+      break;
+   default:
+      blit->fp_offset = 0;
+      if (mask & (PIPE_MASK_R | PIPE_MASK_Z)) blit->color_mask |= 0x0001;
+      if (mask & (PIPE_MASK_G | PIPE_MASK_S)) blit->color_mask |= 0x0010;
+      if (mask & PIPE_MASK_B) blit->color_mask |= 0x0100;
+      if (mask & PIPE_MASK_A) blit->color_mask |= 0x1000;
+      break;
+   }
+}
+
+static void
+nv50_blit_set_dst(struct nv50_context *nv50,
+                  struct pipe_resource *res, unsigned level, unsigned layer)
+{
+   struct pipe_context *pipe = &nv50->base.pipe;
+   struct pipe_surface templ;
+
+   if (util_format_is_depth_or_stencil(res->format))
+      templ.format = nv50_blit_zeta_to_colour_format(res->format);
+   else
+      templ.format = res->format;
+
+   templ.usage = PIPE_USAGE_STREAM;
+   templ.u.tex.level = level;
+   templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
+
+   nv50->framebuffer.cbufs[0] = nv50_miptree_surface_new(pipe, res, &templ);
+   nv50->framebuffer.nr_cbufs = 1;
+   nv50->framebuffer.zsbuf = NULL;
+   nv50->framebuffer.width = nv50->framebuffer.cbufs[0]->width;
+   nv50->framebuffer.height = nv50->framebuffer.cbufs[0]->height;
+}
+
+static INLINE void
+nv50_blit_fixup_tic_entry(struct pipe_sampler_view *view)
+{
+   struct nv50_tic_entry *ent = nv50_tic_entry(view);
+
+   ent->tic[2] &= ~(1 << 31); /* scaled coordinates, ok with 3d textures ? */
+
+   /* magic: */
+
+   ent->tic[3] = 0x20000000; /* affects quality of near vertical edges in MS8 */
+}
+
+static void
+nv50_blit_set_src(struct nv50_context *nv50,
+                  struct pipe_resource *res, unsigned level, unsigned layer)
+{
+   struct pipe_context *pipe = &nv50->base.pipe;
+   struct pipe_sampler_view templ;
+
+   templ.format = res->format;
+   templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
+   templ.u.tex.first_level = templ.u.tex.last_level = level;
+   templ.swizzle_r = PIPE_SWIZZLE_RED;
+   templ.swizzle_g = PIPE_SWIZZLE_GREEN;
+   templ.swizzle_b = PIPE_SWIZZLE_BLUE;
+   templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
+
+   nv50->textures[2][0] = nv50_create_sampler_view(pipe, res, &templ);
+
+   nv50_blit_fixup_tic_entry(nv50->textures[2][0]);
+
+   nv50->num_textures[0] = nv50->num_textures[1] = 0;
+   nv50->num_textures[2] = 1;
+}
+
+static void
+nv50_blitctx_prepare_state(struct nv50_blitctx *blit)
+{
+   struct nouveau_channel *chan = blit->screen->base.channel;
+
+   /* blend state */
+   BEGIN_RING(chan, RING_3D(COLOR_MASK(0)), 1);
+   OUT_RING  (chan, blit->color_mask);
+   BEGIN_RING(chan, RING_3D(BLEND_ENABLE(0)), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(LOGIC_OP_ENABLE), 1);
+   OUT_RING  (chan, 0);
+
+   /* rasterizer state */
+#ifndef NV50_SCISSORS_CLIPPING
+   BEGIN_RING(chan, RING_3D(SCISSOR_ENABLE(0)), 1);
+   OUT_RING  (chan, 1);
+#endif
+   BEGIN_RING(chan, RING_3D(VERTEX_TWO_SIDE_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(FRAG_COLOR_CLAMP_EN), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(MULTISAMPLE_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(MSAA_MASK(0)), 4);
+   OUT_RING  (chan, 0xffff);
+   OUT_RING  (chan, 0xffff);
+   OUT_RING  (chan, 0xffff);
+   OUT_RING  (chan, 0xffff);
+   BEGIN_RING(chan, RING_3D(POLYGON_MODE_FRONT), 3);
+   OUT_RING  (chan, NV50_3D_POLYGON_MODE_FRONT_FILL);
+   OUT_RING  (chan, NV50_3D_POLYGON_MODE_BACK_FILL);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(CULL_FACE_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(POLYGON_STIPPLE_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(POLYGON_OFFSET_FILL_ENABLE), 1);
+   OUT_RING  (chan, 0);
+
+   /* zsa state */
+   BEGIN_RING(chan, RING_3D(DEPTH_TEST_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(STENCIL_ENABLE), 1);
+   OUT_RING  (chan, 0);
+   BEGIN_RING(chan, RING_3D(ALPHA_TEST_ENABLE), 1);
+   OUT_RING  (chan, 0);
+}
+
+static void
+nv50_blitctx_pre_blit(struct nv50_blitctx *blit, struct nv50_context *nv50)
+{
+   int s;
+
+   blit->saved.fb.width = nv50->framebuffer.width;
+   blit->saved.fb.height = nv50->framebuffer.height;
+   blit->saved.fb.nr_cbufs = nv50->framebuffer.nr_cbufs;
+   blit->saved.fb.cbufs[0] = nv50->framebuffer.cbufs[0];
+   blit->saved.fb.zsbuf = nv50->framebuffer.zsbuf;
+
+   blit->saved.vp = nv50->vertprog;
+   blit->saved.gp = nv50->gmtyprog;
+   blit->saved.fp = nv50->fragprog;
+
+   nv50->vertprog = &blit->vp;
+   nv50->gmtyprog = NULL;
+   nv50->fragprog = &blit->fp;
+
+   blit->saved.clip_nr = nv50->clip.nr;
+
+   nv50->clip.nr = 0;
+
+   for (s = 0; s < 3; ++s) {
+      blit->saved.num_textures[s] = nv50->num_textures[s];
+      blit->saved.num_samplers[s] = nv50->num_samplers[s];
+   }
+   blit->saved.texture = nv50->textures[2][0];
+   blit->saved.sampler = nv50->samplers[2][0];
+
+   nv50->samplers[2][0] = &blit->sampler[blit->filter];
+
+   nv50->num_samplers[0] = nv50->num_samplers[1] = 0;
+   nv50->num_samplers[2] = 1;
+
+   blit->saved.dirty = nv50->dirty;
+
+   nv50->dirty =
+      NV50_NEW_FRAMEBUFFER |
+      NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG |
+      NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS;
+}
+
+static void
+nv50_blitctx_post_blit(struct nv50_context *nv50, struct nv50_blitctx *blit)
+{
+   int s;
+
+   pipe_surface_reference(&nv50->framebuffer.cbufs[0], NULL);
+
+   nv50->framebuffer.width = blit->saved.fb.width;
+   nv50->framebuffer.height = blit->saved.fb.height;
+   nv50->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs;
+   nv50->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0];
+   nv50->framebuffer.zsbuf = blit->saved.fb.zsbuf;
+
+   nv50->vertprog = blit->saved.vp;
+   nv50->gmtyprog = blit->saved.gp;
+   nv50->fragprog = blit->saved.fp;
+
+   nv50->clip.nr = blit->saved.clip_nr;
+
+   pipe_sampler_view_reference(&nv50->textures[2][0], NULL);
+
+   for (s = 0; s < 3; ++s) {
+      nv50->num_textures[s] = blit->saved.num_textures[s];
+      nv50->num_samplers[s] = blit->saved.num_samplers[s];
+   }
+   nv50->textures[2][0] = blit->saved.texture;
+   nv50->samplers[2][0] = blit->saved.sampler;
+
+   nv50->dirty = blit->saved.dirty |
+      (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK |
+       NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND |
+       NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS |
+       NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG);
+}
+
+static void
+nv50_resource_resolve(struct pipe_context *pipe,
+                      const struct pipe_resolve_info *info)
+{
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nv50_screen *screen = nv50->screen;
+   struct nv50_blitctx *blit = screen->blitctx;
+   struct nouveau_channel *chan = screen->base.channel;
+   struct pipe_resource *src = info->src.res;
+   struct pipe_resource *dst = info->dst.res;
+   float x0, x1, y0, y1, z;
+   float x_range, y_range;
+
+   nv50_blitctx_get_color_mask_and_fp(blit, dst->format, info->mask);
+
+   blit->filter = util_format_is_depth_or_stencil(dst->format) ? 0 : 1;
+
+   nv50_blitctx_pre_blit(blit, nv50);
+
+   nv50_blit_set_dst(nv50, dst, info->dst.level, info->dst.layer);
+   nv50_blit_set_src(nv50, src, 0,               info->src.layer);
+
+   nv50_blitctx_prepare_state(blit);
+
+   nv50_state_validate(nv50, 36);
+
+   x_range =
+      (float)(info->src.x1 - info->src.x0) /
+      (float)(info->dst.x1 - info->dst.x0);
+   y_range =
+      (float)(info->src.y1 - info->src.y0) /
+      (float)(info->dst.y1 - info->dst.y0);
+
+   x0 = (float)info->src.x0 - x_range * (float)info->dst.x0;
+   y0 = (float)info->src.y0 - y_range * (float)info->dst.y0;
+
+   x1 = x0 + 16384.0f * x_range;
+   y1 = y0 + 16384.0f * y_range;
+
+   x0 *= (float)(1 << nv50_miptree(src)->ms_x);
+   x1 *= (float)(1 << nv50_miptree(src)->ms_x);
+   y0 *= (float)(1 << nv50_miptree(src)->ms_y);
+   y1 *= (float)(1 << nv50_miptree(src)->ms_y);
+
+   z = (float)info->src.layer;
+
+   BEGIN_RING(chan, RING_3D(FP_START_ID), 1);
+   OUT_RING  (chan,
+              blit->fp.code_base + blit->fp_offset);
+
+   BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1);
+   OUT_RING  (chan, 0);
+
+   /* Draw a large triangle in screen coordinates covering the whole
+    * render target, with scissors defining the destination region.
+    * The vertex is supplied with non-normalized texture coordinates
+    * arranged in a way to yield the desired offset and scale.
+    */
+
+   BEGIN_RING(chan, RING_3D(SCISSOR_HORIZ(0)), 2);
+   OUT_RING  (chan, (info->dst.x1 << 16) | info->dst.x0);
+   OUT_RING  (chan, (info->dst.y1 << 16) | info->dst.y0);
+
+   BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+   OUT_RING  (chan, NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES);
+   BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3);
+   OUT_RINGf (chan, x0);
+   OUT_RINGf (chan, y0);
+   OUT_RINGf (chan, z);
+   BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2);
+   OUT_RINGf (chan, 0.0f);
+   OUT_RINGf (chan, 0.0f);
+   BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3);
+   OUT_RINGf (chan, x1);
+   OUT_RINGf (chan, y0);
+   OUT_RINGf (chan, z);
+   BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2);
+   OUT_RINGf (chan, 16384 << nv50_miptree(dst)->ms_x);
+   OUT_RINGf (chan, 0.0f);
+   BEGIN_RING(chan, RING_3D(VTX_ATTR_3F_X(1)), 3);
+   OUT_RINGf (chan, x0);
+   OUT_RINGf (chan, y1);
+   OUT_RINGf (chan, z);
+   BEGIN_RING(chan, RING_3D(VTX_ATTR_2F_X(0)), 2);
+   OUT_RINGf (chan, 0.0f);
+   OUT_RINGf (chan, 16384 << nv50_miptree(dst)->ms_y);
+   BEGIN_RING(chan, RING_3D(VERTEX_END_GL), 1);
+   OUT_RING  (chan, 0);
+
+   /* re-enable normally constant state */
+
+   BEGIN_RING(chan, RING_3D(VIEWPORT_TRANSFORM_EN), 1);
+   OUT_RING  (chan, 1);
+
+   nv50_blitctx_post_blit(nv50, blit);
+}
+
+boolean
+nv50_blitctx_create(struct nv50_screen *screen)
+{
+   screen->blitctx = CALLOC_STRUCT(nv50_blitctx);
+   if (!screen->blitctx) {
+      NOUVEAU_ERR("failed to allocate blit context\n");
+      return FALSE;
+   }
+
+   screen->blitctx->screen = screen;
+
+   nv50_blitctx_make_vp(screen->blitctx);
+   nv50_blitctx_make_fp(screen->blitctx);
+
+   nv50_blitctx_make_sampler(screen->blitctx);
+
+   screen->blitctx->color_mask = 0x1111;
+
+   return TRUE;
+}
+
 void
 nv50_init_surface_functions(struct nv50_context *nv50)
 {
    struct pipe_context *pipe = &nv50->base.pipe;
 
    pipe->resource_copy_region = nv50_resource_copy_region;
+   pipe->resource_resolve = nv50_resource_resolve;
    pipe->clear_render_target = nv50_clear_render_target;
    pipe->clear_depth_stencil = nv50_clear_depth_stencil;
 }
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index f23008ae4cf..1c8347a793a 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -647,7 +647,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS)))
       nv50_update_user_vbufs(nv50);
 
-   nv50_state_validate(nv50);
+   nv50_state_validate(nv50, 8); /* 8 as minimum, we use flush_notify here */
 
    chan->flush_notify = nv50_draw_vbo_flush_notify;
 

From 88a4f2fe543d7c394c0ad732ae60f8cf94c0d357 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:30 -0600
Subject: [PATCH 212/600] mesa: make error handling in glGetTexParameter() a
 bit more concise

---
 src/mesa/main/texparam.c | 270 ++++++++++++++-------------------------
 1 file changed, 98 insertions(+), 172 deletions(-)

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 134f15346e8..78dcc5dccea 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -994,28 +994,21 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
             *params = 0;
          break;
       case GL_TEXTURE_DEPTH_SIZE_ARB:
-         if (ctx->Extensions.ARB_depth_texture)
-            *params = _mesa_get_format_bits(texFormat, pname);
-         else
+         if (!ctx->Extensions.ARB_depth_texture)
             goto invalid_pname;
+         *params = _mesa_get_format_bits(texFormat, pname);
          break;
       case GL_TEXTURE_STENCIL_SIZE_EXT:
-         if (ctx->Extensions.EXT_packed_depth_stencil ||
-             ctx->Extensions.ARB_framebuffer_object) {
-            *params = _mesa_get_format_bits(texFormat, pname);
-         }
-         else {
+         if (!ctx->Extensions.EXT_packed_depth_stencil &&
+             !ctx->Extensions.ARB_framebuffer_object)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, pname);
          break;
       case GL_TEXTURE_SHARED_SIZE:
-         if (ctx->VersionMajor >= 3 ||
-             ctx->Extensions.EXT_texture_shared_exponent) {
-            *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0;
-         }
-         else {
+         if (ctx->VersionMajor < 3 &&
+             !ctx->Extensions.EXT_texture_shared_exponent)
             goto invalid_pname;
-         }
+         *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0;
          break;
 
       /* GL_ARB_texture_compression */
@@ -1036,67 +1029,46 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
 
       /* GL_ARB_texture_float */
       case GL_TEXTURE_RED_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_GREEN_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_BLUE_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_ALPHA_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_LUMINANCE_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_INTENSITY_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_DEPTH_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
 
       default:
@@ -1118,7 +1090,6 @@ void GLAPIENTRY
 _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
 {
    struct gl_texture_object *obj;
-   GLboolean error = GL_FALSE;
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
@@ -1187,49 +1158,37 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
          *params = (GLfloat) obj->MaxLevel;
          break;
       case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-         if (ctx->Extensions.EXT_texture_filter_anisotropic) {
-            *params = obj->Sampler.MaxAnisotropy;
-         }
-	 else
-	    error = GL_TRUE;
+         if (!ctx->Extensions.EXT_texture_filter_anisotropic)
+            goto invalid_pname;
+         *params = obj->Sampler.MaxAnisotropy;
          break;
       case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB:
-         if (ctx->Extensions.ARB_shadow_ambient) {
-            *params = obj->Sampler.CompareFailValue;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_shadow_ambient)
+            goto invalid_pname;
+         *params = obj->Sampler.CompareFailValue;
          break;
       case GL_GENERATE_MIPMAP_SGIS:
 	 *params = (GLfloat) obj->GenerateMipmap;
          break;
       case GL_TEXTURE_COMPARE_MODE_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLfloat) obj->Sampler.CompareMode;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.CompareMode;
          break;
       case GL_TEXTURE_COMPARE_FUNC_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLfloat) obj->Sampler.CompareFunc;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.CompareFunc;
          break;
       case GL_DEPTH_TEXTURE_MODE_ARB:
-         if (ctx->Extensions.ARB_depth_texture) {
-            *params = (GLfloat) obj->Sampler.DepthMode;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_depth_texture)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.DepthMode;
          break;
       case GL_TEXTURE_LOD_BIAS:
-         if (ctx->Extensions.EXT_texture_lod_bias) {
-            *params = obj->Sampler.LodBias;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.EXT_texture_lod_bias)
+            goto invalid_pname;
+         *params = obj->Sampler.LodBias;
          break;
 #if FEATURE_OES_draw_texture
       case GL_TEXTURE_CROP_RECT_OES:
@@ -1244,46 +1203,40 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
       case GL_TEXTURE_SWIZZLE_G_EXT:
       case GL_TEXTURE_SWIZZLE_B_EXT:
       case GL_TEXTURE_SWIZZLE_A_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
-            GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT;
-            *params = (GLfloat) obj->Swizzle[comp];
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_swizzle)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT];
          break;
 
       case GL_TEXTURE_SWIZZLE_RGBA_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
+         if (!ctx->Extensions.EXT_texture_swizzle) {
+            goto invalid_pname;
+         }
+         else {
             GLuint comp;
             for (comp = 0; comp < 4; comp++) {
                params[comp] = (GLfloat) obj->Swizzle[comp];
             }
          }
-         else {
-            error = GL_TRUE;
-         }
          break;
 
       case GL_TEXTURE_CUBE_MAP_SEAMLESS:
-         if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
-            *params = (GLfloat) obj->Sampler.CubeMapSeamless;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.AMD_seamless_cubemap_per_texture)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.CubeMapSeamless;
          break;
 
       default:
-	 error = GL_TRUE;
-	 break;
+         goto invalid_pname;
    }
 
-   if (error)
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)",
-		  pname);
-
+   /* no error if we get here */
    _mesa_unlock_texture(ctx, obj);
+   return;
+
+invalid_pname:
+   _mesa_unlock_texture(ctx, obj);
+   _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", pname);
 }
 
 
@@ -1291,7 +1244,6 @@ void GLAPIENTRY
 _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
 {
    struct gl_texture_object *obj;
-   GLboolean error = GL_FALSE;
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
@@ -1355,55 +1307,37 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
          *params = obj->MaxLevel;
          break;;
       case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-         if (ctx->Extensions.EXT_texture_filter_anisotropic) {
-            *params = (GLint) obj->Sampler.MaxAnisotropy;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_filter_anisotropic)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.MaxAnisotropy;
          break;
       case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB:
-         if (ctx->Extensions.ARB_shadow_ambient) {
-            *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue);
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_shadow_ambient)
+            goto invalid_pname;
+         *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue);
          break;
       case GL_GENERATE_MIPMAP_SGIS:
 	 *params = (GLint) obj->GenerateMipmap;
          break;
       case GL_TEXTURE_COMPARE_MODE_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLint) obj->Sampler.CompareMode;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.CompareMode;
          break;
       case GL_TEXTURE_COMPARE_FUNC_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLint) obj->Sampler.CompareFunc;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.CompareFunc;
          break;
       case GL_DEPTH_TEXTURE_MODE_ARB:
-         if (ctx->Extensions.ARB_depth_texture) {
-            *params = (GLint) obj->Sampler.DepthMode;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_depth_texture)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.DepthMode;
          break;
       case GL_TEXTURE_LOD_BIAS:
-         if (ctx->Extensions.EXT_texture_lod_bias) {
-            *params = (GLint) obj->Sampler.LodBias;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_lod_bias)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.LodBias;
          break;
 #if FEATURE_OES_draw_texture
       case GL_TEXTURE_CROP_RECT_OES:
@@ -1417,42 +1351,34 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
       case GL_TEXTURE_SWIZZLE_G_EXT:
       case GL_TEXTURE_SWIZZLE_B_EXT:
       case GL_TEXTURE_SWIZZLE_A_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
-            GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT;
-            *params = obj->Swizzle[comp];
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_swizzle)
+            goto invalid_pname;
+         *params = obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT];
          break;
 
       case GL_TEXTURE_SWIZZLE_RGBA_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
-            COPY_4V(params, obj->Swizzle);
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_swizzle)
+            goto invalid_pname;
+         COPY_4V(params, obj->Swizzle);
          break;
 
       case GL_TEXTURE_CUBE_MAP_SEAMLESS:
-         if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
-            *params = (GLint) obj->Sampler.CubeMapSeamless;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.AMD_seamless_cubemap_per_texture)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.CubeMapSeamless;
          break;
 
       default:
-         ; /* silence warnings */
+         goto invalid_pname;
    }
 
-   if (error)
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)",
-		  pname);
-
+   /* no error if we get here */
    _mesa_unlock_texture(ctx, obj);
+   return;
+
+invalid_pname:
+   _mesa_unlock_texture(ctx, obj);
+   _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", pname);
 }
 
 

From 1254a2b2e45c6961a57d9c60f561907183ef7de7 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:31 -0600
Subject: [PATCH 213/600] mesa: condense GL_TEXTURE_RESIDENT query code

---
 src/mesa/main/texparam.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 78dcc5dccea..0dec0172989 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1133,14 +1133,8 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
          }
          break;
       case GL_TEXTURE_RESIDENT:
-         {
-            GLboolean resident;
-            if (ctx->Driver.IsTextureResident)
-               resident = ctx->Driver.IsTextureResident(ctx, obj);
-            else
-               resident = GL_TRUE;
-            *params = ENUM_TO_FLOAT(resident);
-         }
+         *params = ctx->Driver.IsTextureResident ?
+            ctx->Driver.IsTextureResident(ctx, obj) : 1.0F;
          break;
       case GL_TEXTURE_PRIORITY:
          *params = obj->Priority;
@@ -1282,14 +1276,8 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
          }
          break;;
       case GL_TEXTURE_RESIDENT:
-         {
-            GLboolean resident;
-            if (ctx->Driver.IsTextureResident)
-               resident = ctx->Driver.IsTextureResident(ctx, obj);
-            else
-               resident = GL_TRUE;
-            *params = (GLint) resident;
-         }
+         *params = ctx->Driver.IsTextureResident ?
+            ctx->Driver.IsTextureResident(ctx, obj) : 1;
          break;;
       case GL_TEXTURE_PRIORITY:
          *params = FLOAT_TO_INT(obj->Priority);

From 02d81dfcaf073b5f7073d405e931b3d3e9f577ef Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:31 -0600
Subject: [PATCH 214/600] mesa: add null ptr checks in GetTexParameterI[u]iv()
 functions

---
 src/mesa/main/texparam.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 0dec0172989..97d0359f170 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1379,6 +1379,8 @@ _mesa_GetTexParameterIiv(GLenum target, GLenum pname, GLint *params)
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
    texObj = get_texobj(ctx, target, GL_TRUE);
+   if (!texObj)
+      return;
    
    switch (pname) {
    case GL_TEXTURE_BORDER_COLOR:
@@ -1399,6 +1401,8 @@ _mesa_GetTexParameterIuiv(GLenum target, GLenum pname, GLuint *params)
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
    texObj = get_texobj(ctx, target, GL_TRUE);
+   if (!texObj)
+      return;
    
    switch (pname) {
    case GL_TEXTURE_BORDER_COLOR:

From 1e89a526c6cd21852b440904711c5ee733ce1ad2 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:31 -0600
Subject: [PATCH 215/600] mesa: whitespace, formatting fixes in
 GetTexParameter() code

---
 src/mesa/main/texparam.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 97d0359f170..bbbb306b2d9 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1115,17 +1115,15 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
          *params = ENUM_TO_FLOAT(obj->Sampler.WrapR);
          break;
       case GL_TEXTURE_BORDER_COLOR:
-         if(ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP))
+         if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP))
             _mesa_update_state_locked(ctx);
-         if(ctx->Color._ClampFragmentColor)
-         {
+         if (ctx->Color._ClampFragmentColor) {
             params[0] = CLAMP(obj->Sampler.BorderColor.f[0], 0.0F, 1.0F);
             params[1] = CLAMP(obj->Sampler.BorderColor.f[1], 0.0F, 1.0F);
             params[2] = CLAMP(obj->Sampler.BorderColor.f[2], 0.0F, 1.0F);
             params[3] = CLAMP(obj->Sampler.BorderColor.f[3], 0.0F, 1.0F);
          }
-         else
-         {
+         else {
             params[0] = obj->Sampler.BorderColor.f[0];
             params[1] = obj->Sampler.BorderColor.f[1];
             params[2] = obj->Sampler.BorderColor.f[2];
@@ -1241,9 +1239,9 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-    obj = get_texobj(ctx, target, GL_TRUE);
-    if (!obj)
-       return;
+   obj = get_texobj(ctx, target, GL_TRUE);
+   if (!obj)
+      return;
 
    _mesa_lock_texture(ctx, obj);
    switch (pname) {

From 192baaac0fc4701e82dcc3e19b3033f81dd82a62 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:31 -0600
Subject: [PATCH 216/600] mesa: minor comment updates in enable.c

---
 src/mesa/main/enable.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index aac8b9c5eaf..3ba4df6342f 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -5,7 +5,6 @@
 
 /*
  * Mesa 3-D graphics library
- * Version:  7.0.3
  *
  * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
  *
@@ -560,7 +559,6 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
          ctx->Polygon.OffsetLine = state;
          break;
       case GL_POLYGON_OFFSET_FILL:
-         /*case GL_POLYGON_OFFSET_EXT:*/
          if (ctx->Polygon.OffsetFill == state)
             return;
          FLUSH_VERTICES(ctx, _NEW_POLYGON);
@@ -643,9 +641,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
          break;
 #endif
 
-      /*
-       * CLIENT STATE!!!
-       */
+      /* client-side state */
       case GL_VERTEX_ARRAY:
       case GL_NORMAL_ARRAY:
       case GL_COLOR_ARRAY:
@@ -1174,7 +1170,6 @@ _mesa_IsEnabled( GLenum cap )
       case GL_POLYGON_OFFSET_LINE:
 	 return ctx->Polygon.OffsetLine;
       case GL_POLYGON_OFFSET_FILL:
-      /*case GL_POLYGON_OFFSET_EXT:*/
 	 return ctx->Polygon.OffsetFill;
       case GL_RESCALE_NORMAL_EXT:
          return ctx->Transform.RescaleNormals;
@@ -1213,9 +1208,7 @@ _mesa_IsEnabled( GLenum cap )
          }
 #endif
 
-      /*
-       * CLIENT STATE!!!
-       */
+      /* client-side state */
       case GL_VERTEX_ARRAY:
          return (ctx->Array.ArrayObj->Vertex.Enabled != 0);
       case GL_NORMAL_ARRAY:

From 09ba2527e885f6134002205716a44d01d83638c3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:31 -0600
Subject: [PATCH 217/600] st/mesa: move declaration before code

---
 src/mesa/state_tracker/st_cb_blit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c
index 276d10fb557..626db12431d 100644
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -90,13 +90,14 @@ st_BlitFramebuffer_resolve(struct gl_context *ctx,
    if (mask & depthStencil) {
       struct gl_renderbuffer_attachment *srcDepth, *srcStencil;
       struct gl_renderbuffer_attachment *dstDepth, *dstStencil;
+      boolean combined;
 
       srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH];
       dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH];
       srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL];
       dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL];
 
-      const boolean combined =
+      combined =
          st_is_depth_stencil_combined(srcDepth, srcStencil) &&
          st_is_depth_stencil_combined(dstDepth, dstStencil);
 

From 50073563b2bfe3716b3dc8b1ed2f91381ba24305 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 08:22:31 -0600
Subject: [PATCH 218/600] st/mesa: silence int/float and double/float
 conversion warnings

---
 src/mesa/state_tracker/st_cb_bitmap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 49b196032b9..067403f396b 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -328,8 +328,8 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,
 
    if(!normalized)
    {
-      sRight = width;
-      tBot = height;
+      sRight = (GLfloat) width;
+      tBot = (GLfloat) height;
    }
 
    /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
@@ -381,7 +381,7 @@ setup_bitmap_vertex_data(struct st_context *st, bool normalized,
    /* same for all verts: */
    for (i = 0; i < 4; i++) {
       st->bitmap.vertices[i][0][2] = z;
-      st->bitmap.vertices[i][0][3] = 1.0;
+      st->bitmap.vertices[i][0][3] = 1.0f;
       st->bitmap.vertices[i][1][0] = color[0];
       st->bitmap.vertices[i][1][1] = color[1];
       st->bitmap.vertices[i][1][2] = color[2];
@@ -513,7 +513,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
    cso_set_vertex_elements(cso, 3, st->velems_util_draw);
 
    /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
-   z = z * 2.0 - 1.0;
+   z = z * 2.0f - 1.0f;
 
    /* draw textured quad */
    offset = setup_bitmap_vertex_data(st,

From b7e89115310628310bf458a33f2df2bf23384cf3 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Tue, 2 Aug 2011 11:36:44 -0500
Subject: [PATCH 219/600] glsl_to_tgsi: remove debugging printf

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index ba4074eecd5..b5f4253ea64 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -840,7 +840,6 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file,
       int index = 0;
       immediate_storage *entry;
       assert(file == PROGRAM_IMMEDIATE);
-      fprintf(stderr, "adding immediate\n");
 
       /* Search immediate storage to see if we already have an identical
        * immediate that we can use instead of adding a duplicate entry.

From 9adcab9cd464d659288e31e6767efb5dee3894ff Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Thu, 4 Aug 2011 10:15:54 -0500
Subject: [PATCH 220/600] st/mesa: replace duplicated
 create_color_map_texture() function with shared function

---
 .../state_tracker/st_atom_pixeltransfer.c     | 22 +-----------------
 src/mesa/state_tracker/st_cb_drawpixels.c     | 23 +------------------
 src/mesa/state_tracker/st_texture.c           | 20 ++++++++++++++++
 src/mesa/state_tracker/st_texture.h           |  4 ++++
 4 files changed, 26 insertions(+), 43 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c
index 95b706cb96c..12b5bc5ba79 100644
--- a/src/mesa/state_tracker/st_atom_pixeltransfer.c
+++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c
@@ -84,26 +84,6 @@ make_state_key(struct gl_context *ctx,  struct state_key *key)
 }
 
 
-static struct pipe_resource *
-create_color_map_texture(struct gl_context *ctx)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_resource *pt;
-   enum pipe_format format;
-   const uint texSize = 256; /* simple, and usually perfect */
-
-   /* find an RGBA texture format */
-   format = st_choose_format(pipe->screen, GL_RGBA,
-                             PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
-
-   /* create texture for color map/table */
-   pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
-                          texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
-   return pt;
-}
-
-
 /**
  * Update the pixelmap texture with the contents of the R/G/B/A pixel maps.
  */
@@ -219,7 +199,7 @@ get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key)
 
       /* create the colormap/texture now if not already done */
       if (!st->pixel_xfer.pixelmap_texture) {
-         st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx);
+         st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
          st->pixel_xfer.pixelmap_sampler_view =
             st_create_texture_sampler_view(st->pipe,
                                            st->pixel_xfer.pixelmap_texture);
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index f4dd2a42847..0c4dc23ccf7 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -94,27 +94,6 @@ is_passthrough_program(const struct gl_fragment_program *prog)
 }
 
 
-/* XXX copied verbatim from st_atom_pixeltransfer.c */
-static struct pipe_resource *
-create_color_map_texture(struct gl_context *ctx)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_resource *pt;
-   enum pipe_format format;
-   const uint texSize = 256; /* simple, and usually perfect */
-
-   /* find an RGBA texture format */
-   format = st_choose_format(pipe->screen, GL_RGBA,
-                             PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
-
-   /* create texture for color map/table */
-   pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
-                          texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
-   return pt;
-}
-
-
 /**
  * Returns a fragment program which implements the current pixel transfer ops.
  */
@@ -142,7 +121,7 @@ get_glsl_pixel_transfer_program(struct st_context *st,
    if (pixelMaps) {
       /* create the colormap/texture now if not already done */
       if (!st->pixel_xfer.pixelmap_texture) {
-         st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx);
+         st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
          st->pixel_xfer.pixelmap_sampler_view =
             st_create_texture_sampler_view(st->pipe,
                                            st->pixel_xfer.pixelmap_texture);
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index ffe7e256a56..d8ba3ac9252 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -396,3 +396,23 @@ st_texture_image_copy(struct pipe_context *pipe,
    }
 }
 
+
+struct pipe_resource *
+st_create_color_map_texture(struct gl_context *ctx)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_resource *pt;
+   enum pipe_format format;
+   const uint texSize = 256; /* simple, and usually perfect */
+
+   /* find an RGBA texture format */
+   format = st_choose_format(pipe->screen, GL_RGBA,
+                             PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
+
+   /* create texture for color map/table */
+   pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
+                          texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
+   return pt;
+}
+
diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h
index d50c3c9af79..b822f47cf9e 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -232,4 +232,8 @@ st_texture_image_copy(struct pipe_context *pipe,
                       struct pipe_resource *src, GLuint srcLevel,
                       GLuint face);
 
+
+extern struct pipe_resource *
+st_create_color_map_texture(struct gl_context *ctx);
+
 #endif

From d6a0692f9dc055c5e5f0e7c806537ad24aa13709 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 13:07:50 -0600
Subject: [PATCH 221/600] mesa: don't use K&R style function pointer calls

---
 src/mesa/main/texobj.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index 3021716a0b6..078a43ab153 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -842,7 +842,7 @@ _mesa_GenTextures( GLsizei n, GLuint *textures )
       struct gl_texture_object *texObj;
       GLuint name = first + i;
       GLenum target = 0;
-      texObj = (*ctx->Driver.NewTextureObject)( ctx, name, target);
+      texObj = ctx->Driver.NewTextureObject(ctx, name, target);
       if (!texObj) {
          _glthread_UNLOCK_MUTEX(ctx->Shared->Mutex);
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTextures");
@@ -1066,7 +1066,7 @@ _mesa_BindTexture( GLenum target, GLuint texName )
       }
       else {
          /* if this is a new texture id, allocate a texture object now */
-         newTexObj = (*ctx->Driver.NewTextureObject)(ctx, texName, target);
+         newTexObj = ctx->Driver.NewTextureObject(ctx, texName, target);
          if (!newTexObj) {
             _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindTexture");
             return;
@@ -1108,7 +1108,7 @@ _mesa_BindTexture( GLenum target, GLuint texName )
 
    /* Pass BindTexture call to device driver */
    if (ctx->Driver.BindTexture)
-      (*ctx->Driver.BindTexture)( ctx, target, newTexObj );
+      ctx->Driver.BindTexture(ctx, target, newTexObj);
 }
 
 

From 1c8d079e205919b24e04efdc2421c18d03f078ff Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 15:32:09 -0600
Subject: [PATCH 222/600] mesa: fix out of bounds array access in rtgc debug
 code

Fixes https://bugs.freedesktop.org/show_bug.cgi?id=39841
This would only be hit if someone set RGTC_DEBUG=1.
---
 src/mesa/main/texcompress_rgtc_tmp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/texcompress_rgtc_tmp.h b/src/mesa/main/texcompress_rgtc_tmp.h
index c8bf082a158..48bbd374e08 100644
--- a/src/mesa/main/texcompress_rgtc_tmp.h
+++ b/src/mesa/main/texcompress_rgtc_tmp.h
@@ -181,7 +181,7 @@ static void TAG(encode_rgtc_chan)(TYPE *blkaddr, TYPE srccolors[4][4],
       fprintf(stderr, "%d ", alphaenc1[i]);
    }
    fprintf(stderr, "cutVals ");
-   for (i = 0; i < 8; i++) {
+   for (i = 0; i < 7; i++) {
       fprintf(stderr, "%d ", acutValues[i]);
    }
    fprintf(stderr, "srcVals ");

From d7f2e38fca38a5521e930242be46be5a70a9cbd3 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 15:55:13 -0600
Subject: [PATCH 223/600] mesa: add st_glsl_to_tgsi.cpp to Sconscript

---
 src/mesa/SConscript | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 24e2155c387..cbd16625186 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -264,6 +264,7 @@ statetracker_sources = [
     'state_tracker/st_draw_feedback.c',
     'state_tracker/st_extensions.c',
     'state_tracker/st_format.c',
+    'state_tracker/st_glsl_to_tgsi.cpp',
     'state_tracker/st_gen_mipmap.c',
     'state_tracker/st_manager.c',
     'state_tracker/st_mesa_to_tgsi.c',

From a0eb83401ef599e597b72e70c8856e1bc0f59dcc Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 15:55:50 -0600
Subject: [PATCH 224/600] mesa: use gl_constant_value type in
 _mesa_[Get]ProgramNamedParameter4fNV()

---
 src/mesa/main/nvprogram.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/mesa/main/nvprogram.c b/src/mesa/main/nvprogram.c
index dd198b8141a..7ff7645b7b7 100644
--- a/src/mesa/main/nvprogram.c
+++ b/src/mesa/main/nvprogram.c
@@ -812,7 +812,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name,
 {
    struct gl_program *prog;
    struct gl_fragment_program *fragProg;
-   GLfloat *v;
+   gl_constant_value *v;
 
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
@@ -834,10 +834,10 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name,
    v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len,
                                     (char *) name);
    if (v) {
-      v[0] = x;
-      v[1] = y;
-      v[2] = z;
-      v[3] = w;
+      v[0].f = x;
+      v[1].f = y;
+      v[2].f = z;
+      v[3].f = w;
       return;
    }
 
@@ -878,7 +878,7 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name,
 {
    struct gl_program *prog;
    struct gl_fragment_program *fragProg;
-   const GLfloat *v;
+   const gl_constant_value *v;
 
    GET_CURRENT_CONTEXT(ctx);
 
@@ -899,10 +899,10 @@ _mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name,
    v = _mesa_lookup_parameter_value(fragProg->Base.Parameters,
                                     len, (char *) name);
    if (v) {
-      params[0] = v[0];
-      params[1] = v[1];
-      params[2] = v[2];
-      params[3] = v[3];
+      params[0] = v[0].f;
+      params[1] = v[1].f;
+      params[2] = v[2].f;
+      params[3] = v[3].f;
       return;
    }
 

From 324857599b2a4735c86e54da9a1776c034dadf72 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 16:00:06 -0600
Subject: [PATCH 225/600] mesa: use gl_constant_value type in ARB program
 parser

---
 src/mesa/program/program_parse.y  | 56 +++++++++++++++----------------
 src/mesa/program/program_parser.h |  3 +-
 2 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y
index dbf5abaa617..dec35038be5 100644
--- a/src/mesa/program/program_parse.y
+++ b/src/mesa/program/program_parse.y
@@ -1854,64 +1854,64 @@ paramConstUse: paramConstScalarUse | paramConstVector;
 paramConstScalarDecl: signedFloatConstant
 	{
 	   $$.count = 4;
-	   $$.data[0] = $1;
-	   $$.data[1] = $1;
-	   $$.data[2] = $1;
-	   $$.data[3] = $1;
+	   $$.data[0].f = $1;
+	   $$.data[1].f = $1;
+	   $$.data[2].f = $1;
+	   $$.data[3].f = $1;
 	}
 	;
 
 paramConstScalarUse: REAL
 	{
 	   $$.count = 1;
-	   $$.data[0] = $1;
-	   $$.data[1] = $1;
-	   $$.data[2] = $1;
-	   $$.data[3] = $1;
+	   $$.data[0].f = $1;
+	   $$.data[1].f = $1;
+	   $$.data[2].f = $1;
+	   $$.data[3].f = $1;
 	}
 	| INTEGER
 	{
 	   $$.count = 1;
-	   $$.data[0] = (float) $1;
-	   $$.data[1] = (float) $1;
-	   $$.data[2] = (float) $1;
-	   $$.data[3] = (float) $1;
+	   $$.data[0].f = (float) $1;
+	   $$.data[1].f = (float) $1;
+	   $$.data[2].f = (float) $1;
+	   $$.data[3].f = (float) $1;
 	}
 	;
 
 paramConstVector: '{' signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = 0.0f;
-	   $$.data[2] = 0.0f;
-	   $$.data[3] = 1.0f;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = 0.0f;
+	   $$.data[2].f = 0.0f;
+	   $$.data[3].f = 1.0f;
 	}
 	| '{' signedFloatConstant ',' signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = $4;
-	   $$.data[2] = 0.0f;
-	   $$.data[3] = 1.0f;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = $4;
+	   $$.data[2].f = 0.0f;
+	   $$.data[3].f = 1.0f;
 	}
 	| '{' signedFloatConstant ',' signedFloatConstant ','
               signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = $4;
-	   $$.data[2] = $6;
-	   $$.data[3] = 1.0f;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = $4;
+	   $$.data[2].f = $6;
+	   $$.data[3].f = 1.0f;
 	}
 	| '{' signedFloatConstant ',' signedFloatConstant ','
               signedFloatConstant ',' signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = $4;
-	   $$.data[2] = $6;
-	   $$.data[3] = $8;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = $4;
+	   $$.data[2].f = $6;
+	   $$.data[3].f = $8;
 	}
 	;
 
diff --git a/src/mesa/program/program_parser.h b/src/mesa/program/program_parser.h
index 8e5aaee95e5..5637598f3b3 100644
--- a/src/mesa/program/program_parser.h
+++ b/src/mesa/program/program_parser.h
@@ -23,6 +23,7 @@
 #pragma once
 
 #include "main/config.h"
+#include "program/prog_parameter.h"
 
 struct gl_context;
 
@@ -96,7 +97,7 @@ struct asm_symbol {
 
 struct asm_vector {
    unsigned count;
-   float    data[4];
+   gl_constant_value data[4];
 };
 
 

From bf8d06c518a8e17e485b18ba03be3e1b45cc7327 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 4 Aug 2011 16:01:27 -0600
Subject: [PATCH 226/600] mesa: pass correct constant type to
 _mesa_fetch_state()

Fixes assorted warnings about float vs. gl_constant_value pointers.
---
 src/mesa/program/prog_statevars.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index 16f9690e865..6aa2409e85e 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -1111,7 +1111,7 @@ _mesa_load_state_parameters(struct gl_context *ctx,
       if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
          _mesa_fetch_state(ctx,
 			   paramList->Parameters[i].StateIndexes,
-                           paramList->ParameterValues[i]);
+                           &paramList->ParameterValues[i][0].f);
       }
    }
 }

From a48118e510fcbb57634a7869cb628123fa8c3f2e Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Thu, 4 Aug 2011 18:04:44 -0700
Subject: [PATCH 227/600] mesa: Remove MSVC stdint typedefs from compiler.h.

MSVC can now include the stdint.h at include/c99/stdint.h.
---
 src/mesa/main/compiler.h | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index d736fdfc58a..ee7d0b2f880 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -60,29 +60,7 @@ extern "C" {
 /**
  * Get standard integer types
  */
-#if defined(_MSC_VER)
-   typedef __int8             int8_t;
-   typedef unsigned __int8    uint8_t;
-   typedef __int16            int16_t;
-   typedef unsigned __int16   uint16_t;
-   typedef __int32            int32_t;
-   typedef unsigned __int32   uint32_t;
-   typedef __int64            int64_t;
-   typedef unsigned __int64   uint64_t;
-
-#  if defined(_WIN64)
-     typedef __int64            intptr_t;
-     typedef unsigned __int64   uintptr_t;
-#  else
-     typedef __int32            intptr_t;
-     typedef unsigned __int32   uintptr_t;
-#  endif
-
-#  define INT64_C(__val) __val##i64
-#  define UINT64_C(__val) __val##ui64
-#else
-#  include <stdint.h>
-#endif
+#include <stdint.h>
 
 
 /**

From 547212d963c70161915c46d64e8020617199fb8d Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Thu, 4 Aug 2011 00:39:07 +0900
Subject: [PATCH 228/600] glsl: empty declarations should be valid

Unlike C++, empty declarations such as

  float;

should be valid.  The spec is not explicit about this actually.

Some apps that generate their shader sources may rely on this.  This was
noted when porting one of them to Linux from Windows.

Reviewed-by: Chad Versace <chad@chad-versace.us>

Note: this is a candidate for the 7.11 branch.
---
 src/glsl/ast_to_hir.cpp | 10 +++++-----
 src/glsl/glsl_parser.yy | 10 +++-------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index c0524bf0bcc..7da14611950 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2399,12 +2399,12 @@ ast_declarator_list::hir(exec_list *instructions,
 
    decl_type = this->type->specifier->glsl_type(& type_name, state);
    if (this->declarations.is_empty()) {
-      /* The only valid case where the declaration list can be empty is when
-       * the declaration is setting the default precision of a built-in type
-       * (e.g., 'precision highp vec4;').
-       */
-
       if (decl_type != NULL) {
+	 /* Warn if this empty declaration is not for declaring a structure.
+	  */
+	 if (this->type->specifier->structure == NULL) {
+	    _mesa_glsl_warning(&loc, state, "empty declaration");
+	 }
       } else {
 	    _mesa_glsl_error(& loc, state, "incomplete declaration");
       }
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 2c0498ece7a..1851f1e202e 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -971,13 +971,9 @@ single_declaration:
 	fully_specified_type
 	{
 	   void *ctx = state;
-	   if ($1->specifier->type_specifier != ast_struct) {
-	      _mesa_glsl_error(& @1, state, "empty declaration list\n");
-	      YYERROR;
-	   } else {
-	      $$ = new(ctx) ast_declarator_list($1);
-	      $$->set_location(yylloc);
-	   }
+	   /* Empty declaration list is valid. */
+	   $$ = new(ctx) ast_declarator_list($1);
+	   $$->set_location(yylloc);
 	}
 	| fully_specified_type any_identifier
 	{

From c251d83d916336f95109363e919920a024947230 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 07:38:13 +0200
Subject: [PATCH 229/600] vbo: do not call _mesa_max_buffer_index in debug
 builds

That code drops performance in Unigine Heaven and Tropics
by a factor of 10. That's too crazy even for a debug build.

NOTE: This is a candidate for the 7.11 branch.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/vbo/vbo_exec_array.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index b908d5aea7e..32ce0e4a8ff 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -909,11 +909,10 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode,
       if (0)
          _mesa_print_arrays(ctx);
 
-#ifdef DEBUG
       /* 'end' was out of bounds, but now let's check the actual array
        * indexes to see if any of them are out of bounds.
        */
-      {
+      if (0) {
          GLuint max = _mesa_max_buffer_index(ctx, count, type, indices,
                                              ctx->Array.ElementArrayBufferObj);
          if (max >= ctx->Array.ArrayObj->_MaxElement) {
@@ -934,7 +933,6 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode,
           * upper bound wrong.
           */
       }
-#endif
 
       /* Set 'end' to the max possible legal value */
       assert(ctx->Array.ArrayObj->_MaxElement >= 1);

From 6b3bbf52b884ef4b5f0049623ec7154dd3c1dc31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 5 Aug 2011 06:03:18 +0200
Subject: [PATCH 230/600] r300g: adapt to the resource_resolve interface change

---
 src/gallium/drivers/r300/r300_render.c | 43 +++++++++++++-------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index b31141a518e..d69b4cf4275 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -1267,33 +1267,31 @@ done:
     r300->sprite_coord_enable = last_sprite_coord_enable;
 }
 
-static void r300_resource_resolve(struct pipe_context* pipe,
-                                  struct pipe_resource* dest,
-                                  unsigned dst_layer,
-                                  struct pipe_resource* src,
-                                  unsigned src_layer)
+static void r300_resource_resolve(struct pipe_context *pipe,
+                                  const struct pipe_resolve_info *info)
 {
-    struct r300_context* r300 = r300_context(pipe);
-    struct pipe_surface* srcsurf, surf_tmpl;
+    struct r300_context *r300 = r300_context(pipe);
+    struct pipe_surface *srcsurf, *dstsurf, surf_tmpl;
     struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
     float color[] = {0, 0, 0, 0};
 
     memset(&surf_tmpl, 0, sizeof(surf_tmpl));
-    surf_tmpl.format = src->format;
-    surf_tmpl.usage = 0; /* not really a surface hence no bind flags */
-    surf_tmpl.u.tex.level = 0; /* msaa resources cannot have mipmaps */
-    surf_tmpl.u.tex.first_layer = src_layer;
-    surf_tmpl.u.tex.last_layer = src_layer;
-    srcsurf = pipe->create_surface(pipe, src, &surf_tmpl);
-    surf_tmpl.format = dest->format;
-    surf_tmpl.u.tex.first_layer = dst_layer;
-    surf_tmpl.u.tex.last_layer = dst_layer;
+    surf_tmpl.format = info->src.res->format;
+    surf_tmpl.u.tex.first_layer =
+    surf_tmpl.u.tex.last_layer = info->src.layer;
+    srcsurf = pipe->create_surface(pipe, info->src.res, &surf_tmpl);
+    /* XXX Offset both surfaces by x0,y1. */
+
+    surf_tmpl.format = info->dst.res->format;
+    surf_tmpl.u.tex.level = info->dst.level;
+    surf_tmpl.u.tex.first_layer =
+    surf_tmpl.u.tex.last_layer = info->dst.layer;
+    dstsurf = pipe->create_surface(pipe, info->dst.res, &surf_tmpl);
 
     DBG(r300, DBG_DRAW, "r300: Resolving resource...\n");
 
     /* Enable AA resolve. */
-    aa->dest = r300_surface(pipe->create_surface(pipe, dest, &surf_tmpl));
-
+    aa->dest = r300_surface(dstsurf);
     aa->aaresolve_ctl =
         R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE |
         R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE;
@@ -1301,16 +1299,19 @@ static void r300_resource_resolve(struct pipe_context* pipe,
     r300_mark_atom_dirty(r300, &r300->aa_state);
 
     /* Resolve the surface. */
+    /* XXX: y1 < 0 ==> Y flip */
     r300->context.clear_render_target(pipe,
-        srcsurf, color, 0, 0, src->width0, src->height0);
+                                      srcsurf, color, 0, 0,
+                                      info->dst.x1 - info->dst.x0,
+                                      info->dst.y1 - info->dst.y0);
 
     /* Disable AA resolve. */
     aa->aaresolve_ctl = 0;
     r300->aa_state.size = 4;
     r300_mark_atom_dirty(r300, &r300->aa_state);
 
-    pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL);
-    pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL);
+    pipe_surface_reference(&srcsurf, NULL);
+    pipe_surface_reference(&dstsurf, NULL);
 }
 
 void r300_init_render_functions(struct r300_context *r300)

From d99c8e191b9dd206eae42ffab1ade01054026ebc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 5 Aug 2011 06:04:05 +0200
Subject: [PATCH 231/600] r300g: handle new CAPs

---
 src/gallium/drivers/r300/r300_screen.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 47de4005c37..674bd24953c 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -142,6 +142,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
         case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
         case PIPE_CAP_SEAMLESS_CUBE_MAP:
         case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+        case PIPE_CAP_SCALED_RESOLVE:
             return 0;
 
         /* SWTCL-only features. */
@@ -211,13 +212,12 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
         case PIPE_SHADER_CAP_MAX_PREDS:
             return is_r500 ? 1 : 0;
         case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
-            return 0;
         case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
         case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
         case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
         case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
-            return 0;
         case PIPE_SHADER_CAP_SUBROUTINES:
+        case PIPE_SHADER_CAP_INTEGERS:
             return 0;
         }
         break;
@@ -248,20 +248,15 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
             return 1; /* XXX guessed */
         case PIPE_SHADER_CAP_MAX_PREDS:
             return is_r500 ? 4 : 0; /* XXX guessed. */
+        case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+            return 1;
         case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
-            return 0;
         case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
         case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
         case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
-            return 0;
-        case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
-            return 1;
         case PIPE_SHADER_CAP_SUBROUTINES:
-            return 0;
         case PIPE_SHADER_CAP_INTEGERS:
             return 0;
-        default:
-            break;
         }
         break;
     default:

From 6e7942936c5de59f509779b6f7620d80d2fbc21a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 5 Aug 2011 06:57:07 +0200
Subject: [PATCH 232/600] st/mesa: remove unused-but-set variables in
 st_glsl_to_tgsi.cpp

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 9c6a7ed738a..460bafb3821 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1725,7 +1725,6 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
    if (index) {
       src.index += index->value.i[0] * element_size;
    } else {
-      st_src_reg array_base = this->result;
       /* Variable index array dereference.  It eats the "vec4" of the
        * base of the array and an index that offsets the TGSI register
        * index.
@@ -2463,7 +2462,7 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir)
 void
 glsl_to_tgsi_visitor::visit(ir_if *ir)
 {
-   glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL;
+   glsl_to_tgsi_instruction *cond_inst, *if_inst;
    glsl_to_tgsi_instruction *prev_inst;
 
    prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
@@ -2495,7 +2494,7 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
    visit_exec_list(&ir->then_instructions, this);
 
    if (!ir->else_instructions.is_empty()) {
-      else_inst = emit(ir->condition, TGSI_OPCODE_ELSE);
+      emit(ir->condition, TGSI_OPCODE_ELSE);
       visit_exec_list(&ir->else_instructions, this);
    }
 

From a3cde50effbc469379bf5d4d69d03464de43fb29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 5 Aug 2011 07:02:25 +0200
Subject: [PATCH 233/600] st/dri: remove a dummy function dri2_create_context

It does nothing besides calling dri_create_context with the same parameters.
---
 src/gallium/state_trackers/dri/drm/dri2.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c
index fe4ddb312be..5344775f96f 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -661,20 +661,6 @@ fail:
    return NULL;
 }
 
-static boolean
-dri2_create_context(gl_api api, const struct gl_config * visual,
-                    __DRIcontext * cPriv, void *sharedContextPrivate)
-{
-   struct dri_context *ctx = NULL;
-
-   if (!dri_create_context(api, visual, cPriv, sharedContextPrivate))
-      return FALSE;
-
-   ctx = cPriv->driverPrivate;
-
-   return TRUE;
-}
-
 static boolean
 dri2_create_buffer(__DRIscreen * sPriv,
                    __DRIdrawable * dPriv,
@@ -702,7 +688,7 @@ const struct __DriverAPIRec driDriverAPI = {
    .InitScreen = NULL,
    .InitScreen2 = dri2_init_screen,
    .DestroyScreen = dri_destroy_screen,
-   .CreateContext = dri2_create_context,
+   .CreateContext = dri_create_context,
    .DestroyContext = dri_destroy_context,
    .CreateBuffer = dri2_create_buffer,
    .DestroyBuffer = dri_destroy_buffer,

From 115651241b7f04e7ec274c267e7de5d1c8fb8c9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 5 Aug 2011 07:07:46 +0200
Subject: [PATCH 234/600] st/dri: remove an unused-but-set variable

---
 src/gallium/state_trackers/dri/drm/dri2.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c
index 5344775f96f..d491e46ab16 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -266,7 +266,6 @@ dri2_allocate_buffer(__DRIscreen *sPriv,
    struct dri_screen *screen = dri_screen(sPriv);
    struct dri2_buffer *buffer;
    struct pipe_resource templ;
-   enum st_attachment_type statt;
    enum pipe_format pf;
    unsigned bind = 0;
    struct winsys_handle whandle;
@@ -274,22 +273,16 @@ dri2_allocate_buffer(__DRIscreen *sPriv,
    switch (attachment) {
       case __DRI_BUFFER_FRONT_LEFT:
       case __DRI_BUFFER_FAKE_FRONT_LEFT:
-         statt = ST_ATTACHMENT_FRONT_LEFT;
          bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
          break;
       case __DRI_BUFFER_BACK_LEFT:
-         statt = ST_ATTACHMENT_BACK_LEFT;
          bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
          break;
       case __DRI_BUFFER_DEPTH:
       case __DRI_BUFFER_DEPTH_STENCIL:
       case __DRI_BUFFER_STENCIL:
-            statt = ST_ATTACHMENT_DEPTH_STENCIL;
             bind = PIPE_BIND_DEPTH_STENCIL; /* XXX sampler? */
          break;
-      default:
-         statt = ST_ATTACHMENT_INVALID;
-         break;
    }
 
    switch (format) {

From 64ab39b035f755510a644643b96451431bbe5f27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 3 Aug 2011 20:57:48 +0200
Subject: [PATCH 235/600] winsys/radeon: fix space checking

We should remove the relocations which caused a validation failure
from the list, so that the kernel receives only the validated ones.

NOTE: This is a candidate for the 7.11 branch.
---
 src/gallium/drivers/r300/r300_emit.c          |  3 +-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 33 +++++++++++++++++--
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h |  1 +
 src/gallium/winsys/radeon/drm/radeon_winsys.h |  4 ++-
 4 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 502aed3a20c..b953bd10f43 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -1238,13 +1238,12 @@ validate:
         r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
                                 r300_resource(index_buffer)->domain, 0);
 
-    /* Now do the validation. */
+    /* Now do the validation (flush is called inside cs_validate on failure). */
     if (!r300->rws->cs_validate(r300->cs)) {
         /* Ooops, an infinite loop, give up. */
         if (flushed)
             return FALSE;
 
-        r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL);
         flushed = TRUE;
         goto validate;
     }
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index f0f4a70be3f..fec660d4cc8 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -115,6 +115,7 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
     }
 
     csc->crelocs = 0;
+    csc->validated_crelocs = 0;
     csc->chunks[0].length_dw = 0;
     csc->chunks[1].length_dw = 0;
     csc->used_gart = 0;
@@ -307,9 +308,37 @@ static void radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
 {
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+    boolean status =
+        cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
+        cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
 
-    return cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
-           cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
+    if (status) {
+        cs->csc->validated_crelocs = cs->csc->crelocs;
+    } else {
+        /* Remove lately-added relocations. The validation failed with them
+         * and the CS is about to be flushed because of that. Keep only
+         * the already-validated relocations. */
+        unsigned i;
+
+        for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
+            p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
+            radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
+        }
+        cs->csc->crelocs = cs->csc->validated_crelocs;
+
+        /* Flush if there are any relocs. Clean up otherwise. */
+        if (cs->csc->crelocs) {
+            cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
+        } else {
+            radeon_cs_context_cleanup(cs->csc);
+
+            assert(cs->base.cdw == 0);
+            if (cs->base.cdw != 0) {
+                fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
+            }
+        }
+    }
+    return status;
 }
 
 static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index ea2a820b30a..fe285326884 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -41,6 +41,7 @@ struct radeon_cs_context {
     /* Relocs. */
     unsigned                    nrelocs;
     unsigned                    crelocs;
+    unsigned			validated_crelocs;
     struct radeon_bo            **relocs_bo;
     struct drm_radeon_cs_reloc  *relocs;
 
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index f8a4d3abd43..6d52dc25022 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -262,7 +262,9 @@ struct radeon_winsys {
 
     /**
      * Return TRUE if there is enough memory in VRAM and GTT for the relocs
-     * added so far.
+     * added so far. If the validation fails, all the relocations which have
+     * been added since the last call of cs_validate will be removed and
+     * the CS will be flushed (provided there are still any relocations).
      *
      * \param cs        A command stream to validate.
      */

From 5b005ecc2b624a0ffb577ab760abacf069694f8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 3 Aug 2011 21:01:31 +0200
Subject: [PATCH 236/600] winsys/radeon: do the CS cleanup in the CS ioctl
 thread

---
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index fec660d4cc8..1b30b95a318 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -380,6 +380,8 @@ static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param)
 
     for (i = 0; i < csc->crelocs; i++)
         p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
+
+    radeon_cs_context_cleanup(csc);
     return NULL;
 }
 
@@ -424,6 +426,8 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
         } else {
             radeon_drm_cs_emit_ioctl(cs->csc);
         }
+    } else {
+        radeon_cs_context_cleanup(cs->csc);
     }
 
     /* Flip command streams. */
@@ -432,8 +436,6 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
     cs->cst = tmp;
 
     /* Prepare a new CS. */
-    radeon_cs_context_cleanup(cs->csc);
-
     cs->base.buf = cs->csc->buf;
     cs->base.cdw = 0;
 }

From 0722edc59cd526437c2d4bad474b934dad84d789 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 28 Jul 2011 09:57:19 -0700
Subject: [PATCH 237/600] i965/fs: Don't allocate the old backend's compile
 structs for our compile.

This saves some 35MB when the program only uses GLSL shaders.
---
 src/mesa/drivers/dri/i965/brw_wm.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index b0dfdd536aa..d13ac6124c8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -206,10 +206,6 @@ bool do_wm_prog(struct brw_context *brw,
           */
          return false;
       }
-      c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
-      c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
-      c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
-      c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
    } else {
       void *instruction = c->instruction;
       void *prog_instructions = c->prog_instructions;
@@ -232,6 +228,13 @@ bool do_wm_prog(struct brw_context *brw,
       if (!brw_wm_fs_emit(brw, c, prog))
 	 return false;
    } else {
+      if (!c->instruction) {
+	 c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
+	 c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
+	 c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
+	 c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
+      }
+
       /* Fallback for fixed function and ARB_fp shaders. */
       c->dispatch_width = 16;
       brw_wm_payload_setup(brw, c);

From ee0373b833155804bb8846c6f05f897b9ee5afa6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 25 Jul 2011 18:13:04 -0700
Subject: [PATCH 238/600] i965/fs: Don't upload unused uniform components.

This saves both register space and upload bandwidth for unused values.

Note that previously we were relying on the visitor not initially
generating references to different sets of uniforms between the 8-wide
and 16-wide code generation, and now we're relying on them dead-code
eliminating the same stuff, too.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 89 +++++++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_fs.h   | 10 +++-
 2 files changed, 95 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 02041b3bc03..f55be022f72 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -242,11 +242,12 @@ import_uniforms_callback(const void *key,
  * This brings in those uniform definitions
  */
 void
-fs_visitor::import_uniforms(struct hash_table *src_variable_ht)
+fs_visitor::import_uniforms(fs_visitor *v)
 {
-   hash_table_call_foreach(src_variable_ht,
+   hash_table_call_foreach(v->variable_ht,
 			   import_uniforms_callback,
 			   variable_ht);
+   this->params_remap = v->params_remap;
 }
 
 /* Our support for uniforms is piggy-backed on the struct
@@ -798,6 +799,86 @@ fs_visitor::split_virtual_grfs()
    this->live_intervals_valid = false;
 }
 
+bool
+fs_visitor::remove_dead_constants()
+{
+   if (c->dispatch_width == 8) {
+      this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
+	 this->params_remap[i] = -1;
+
+      /* Find which params are still in use. */
+      foreach_list(node, &this->instructions) {
+	 fs_inst *inst = (fs_inst *)node;
+
+	 for (int i = 0; i < 3; i++) {
+	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+
+	    if (inst->src[i].file != UNIFORM)
+	       continue;
+
+	    assert(constant_nr < (int)c->prog_data.nr_params);
+
+	    /* For now, set this to non-negative.  We'll give it the
+	     * actual new number in a moment, in order to keep the
+	     * register numbers nicely ordered.
+	     */
+	    this->params_remap[constant_nr] = 0;
+	 }
+      }
+
+      /* Figure out what the new numbers for the params will be.  At some
+       * point when we're doing uniform array access, we're going to want
+       * to keep the distinction between .reg and .reg_offset, but for
+       * now we don't care.
+       */
+      unsigned int new_nr_params = 0;
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+	 if (this->params_remap[i] != -1) {
+	    this->params_remap[i] = new_nr_params++;
+	 }
+      }
+
+      /* Update the list of params to be uploaded to match our new numbering. */
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+	 int remapped = this->params_remap[i];
+
+	 if (remapped == -1)
+	    continue;
+
+	 /* We've already done setup_paramvalues_refs() so no need to worry
+	  * about param_index and param_offset.
+	  */
+	 c->prog_data.param[remapped] = c->prog_data.param[i];
+	 c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i];
+      }
+
+      c->prog_data.nr_params = new_nr_params;
+   } else {
+      /* This should have been generated in the 8-wide pass already. */
+      assert(this->params_remap);
+   }
+
+   /* Now do the renumbering of the shader to remove unused params. */
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      for (int i = 0; i < 3; i++) {
+	 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+
+	 if (inst->src[i].file != UNIFORM)
+	    continue;
+
+	 assert(this->params_remap[constant_nr] != -1);
+	 inst->src[i].hw_reg = this->params_remap[constant_nr];
+	 inst->src[i].reg_offset = 0;
+      }
+   }
+
+   return true;
+}
+
 /**
  * Choose accesses from the UNIFORM file to demote to using the pull
  * constant buffer.
@@ -1624,6 +1705,8 @@ fs_visitor::run()
 	 progress = dead_code_eliminate() || progress;
       } while (progress);
 
+      remove_dead_constants();
+
       schedule_instructions();
 
       assign_curb_setup();
@@ -1702,7 +1785,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
    if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
       c->dispatch_width = 16;
       fs_visitor v2(c, prog, shader);
-      v2.import_uniforms(v.variable_ht);
+      v2.import_uniforms(&v);
       v2.run();
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 89d6cda7e4f..96e1420038f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -421,7 +421,7 @@ public:
 
    fs_reg *variable_storage(ir_variable *var);
    int virtual_grf_alloc(int size);
-   void import_uniforms(struct hash_table *src_variable_ht);
+   void import_uniforms(fs_visitor *v);
 
    void visit(ir_variable *ir);
    void visit(ir_assignment *ir);
@@ -489,6 +489,7 @@ public:
    bool register_coalesce();
    bool compute_to_mrf();
    bool dead_code_eliminate();
+   bool remove_dead_constants();
    bool remove_duplicate_mrf_writes();
    bool virtual_grf_interferes(int a, int b);
    void schedule_instructions();
@@ -566,6 +567,13 @@ public:
    int *virtual_grf_use;
    bool live_intervals_valid;
 
+   /* This is the map from UNIFORM hw_reg + reg_offset as generated by
+    * the visitor to the packed uniform number after
+    * remove_dead_constants() that represents the actual uploaded
+    * uniform index.
+    */
+   int *params_remap;
+
    struct hash_table *variable_ht;
    ir_variable *frag_color, *frag_data, *frag_depth;
    int first_non_payload_grf;

From 69dc529da241747888efefdf0d3e58479dd6248c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 28 Jul 2011 09:52:03 -0700
Subject: [PATCH 239/600] mesa: Remove dead "MemPool" field of gl_shader_state.

---
 src/mesa/main/mtypes.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index b88118366b2..2d5f44c1e7b 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2252,8 +2252,6 @@ struct gl_shader_state
     */
    struct gl_shader_program *ActiveProgram;
 
-   void *MemPool;
-
    GLbitfield Flags;                    /**< Mask of GLSL_x flags */
 };
 

From 9998df36c271810ecf20041bf6bed28f3952a94f Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 25 Jul 2011 18:15:25 -0700
Subject: [PATCH 240/600] i965: Add dumping for gen6 WM constants too.

This looks just like the VS dump for now.
---
 src/mesa/drivers/dri/i965/brw_context.h    |  1 +
 src/mesa/drivers/dri/i965/brw_state_dump.c | 20 ++++++++++++++++++++
 src/mesa/drivers/dri/i965/gen6_wm_state.c  |  2 +-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 471015cf9d0..22baf978ad4 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -212,6 +212,7 @@ enum state_struct_type {
    AUB_TRACE_BINDING_TABLE =		0x101,
    AUB_TRACE_SURFACE_STATE =		0x102,
    AUB_TRACE_VS_CONSTANTS =		0x103,
+   AUB_TRACE_WM_CONSTANTS =		0x104,
 };
 
 /** Subclass of Mesa vertex program */
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index b9e5cc1a534..cb7a3ef73d3 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -455,6 +455,23 @@ dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
    }
 }
 
+static void
+dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
+{
+   const char *name = "WM_CONST";
+   struct intel_context *intel = &brw->intel;
+   uint32_t *as_uint = intel->batch.bo->virtual + offset;
+   float *as_float = intel->batch.bo->virtual + offset;
+   int i;
+
+   for (i = 0; i < size / 4; i += 4) {
+      batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n",
+		i / 4,
+		as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3],
+		as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]);
+   }
+}
+
 static void dump_binding_table(struct brw_context *brw, uint32_t offset,
 			       uint32_t size)
 {
@@ -602,6 +619,9 @@ dump_state_batch(struct brw_context *brw)
       case AUB_TRACE_VS_CONSTANTS:
 	 dump_vs_constants(brw, offset, size);
 	 break;
+      case AUB_TRACE_WM_CONSTANTS:
+	 dump_wm_constants(brw, offset, size);
+	 break;
       default:
 	 break;
       }
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 185da9c355f..3d525248f25 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -54,7 +54,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
       float *constants;
       unsigned int i;
 
-      constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE,
+      constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS,
 				  brw->wm.prog_data->nr_params *
 				  sizeof(float),
 				  32, &brw->wm.push_const_offset);

From 6bd5f43f212962a054a41290b0f8e350dae2f40d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 15:13:08 -0700
Subject: [PATCH 241/600] prog_optimize: Add support for saturates to
 _mesa_merge_mov_into_inst.

This fixes the remaining regression from ff_fragment_shader in Mesa IR
instruction count, to now being a 1.9% win overall.
---
 src/mesa/program/prog_optimize.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index f4a7a638d5f..3340ce0498b 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -472,8 +472,7 @@ can_downward_mov_be_modifed(const struct prog_instruction *mov)
       mov->SrcReg[0].HasIndex2 == 0 &&
       mov->SrcReg[0].RelAddr2 == 0 &&
       mov->DstReg.RelAddr == 0 &&
-      mov->DstReg.CondMask == COND_TR &&
-      mov->SaturateMode == SATURATE_OFF;
+      mov->DstReg.CondMask == COND_TR;
 }
 
 
@@ -482,7 +481,8 @@ can_upward_mov_be_modifed(const struct prog_instruction *mov)
 {
    return
       can_downward_mov_be_modifed(mov) &&
-      mov->DstReg.File == PROGRAM_TEMPORARY;
+      mov->DstReg.File == PROGRAM_TEMPORARY &&
+      mov->SaturateMode == SATURATE_OFF;
 }
 
 
@@ -657,6 +657,8 @@ _mesa_merge_mov_into_inst(struct prog_instruction *inst,
    if (mask != (inst->DstReg.WriteMask & mask))
       return GL_FALSE;
 
+   inst->SaturateMode |= mov->SaturateMode;
+
    /* Depending on the instruction, we may need to recompute the swizzles.
     * Also, some other instructions (like TEX) are not linear. We will only
     * consider completely active sources and destinations

From 62722d90af9d43d889af33b080a682f2004e049c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 22 Jul 2011 13:54:15 -0700
Subject: [PATCH 242/600] ir_to_mesa: Try to avoid emitting a MOV_SAT to
 saturate an expression tree.

Fixes a regression in codegen quality for ff_fragment_shader
conversion to GLSL -- glean texCombine produces 7.5% fewer Mesa IR
instructions.
---
 src/mesa/program/ir_to_mesa.cpp | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index debadb9a398..9b615b68a23 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -915,10 +915,30 @@ ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
    sat_src->accept(this);
    src_reg src = this->result;
 
-   this->result = get_temp(ir->type);
-   ir_to_mesa_instruction *inst;
-   inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
-   inst->saturate = true;
+   /* If we generated an expression instruction into a temporary in
+    * processing the saturate's operand, apply the saturate to that
+    * instruction.  Otherwise, generate a MOV to do the saturate.
+    *
+    * Note that we have to be careful to only do this optimization if
+    * the instruction in question was what generated src->result.  For
+    * example, ir_dereference_array might generate a MUL instruction
+    * to create the reladdr, and return us a src reg using that
+    * reladdr.  That MUL result is not the value we're trying to
+    * saturate.
+    */
+   ir_expression *sat_src_expr = sat_src->as_expression();
+   ir_to_mesa_instruction *new_inst;
+   new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
+   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
+			sat_src_expr->operation == ir_binop_add ||
+			sat_src_expr->operation == ir_binop_dot)) {
+      new_inst->saturate = true;
+   } else {
+      this->result = get_temp(ir->type);
+      ir_to_mesa_instruction *inst;
+      inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
+      inst->saturate = true;
+   }
 
    return true;
 }

From fbc2fcf685d22ec9bc9465e1f731529979497eaa Mon Sep 17 00:00:00 2001
From: Christopher James Halse Rogers <christopher.halse.rogers@canonical.com>
Date: Thu, 4 Aug 2011 12:06:13 +1000
Subject: [PATCH 243/600] glx/dri2: Paper over errors in DRI2Connect when
 indirect

DRI2 will throw BadRequest for this when the client is not local, but
DRI2 is an implementation detail and not something callers should have
to know about.  Silently swallow errors in this case, and just propagate
the failure through DRI2Connect's return code.

Note: This is a candidate for the stable release branches.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=28125
Signed-off-by: Christopher James Halse Rogers <christopher.halse.rogers@canonical.com>
---
 src/glx/dri2.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/glx/dri2.c b/src/glx/dri2.c
index 229840d6919..b1b5013d048 100644
--- a/src/glx/dri2.c
+++ b/src/glx/dri2.c
@@ -190,6 +190,15 @@ DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code)
 	err->minorCode == X_DRI2DestroyDrawable)
 	return True;
 
+    /* If the server is non-local DRI2Connect will raise BadRequest.
+     * Swallow this so that DRI2Connect can signal this in its return code */
+    if (err->majorCode == codes->major_opcode &&
+        err->minorCode == X_DRI2Connect &&
+        err->errorCode == BadRequest) {
+	*ret_code = False;
+	return True;
+    }
+
     return False;
 }
 

From 4c7e215c7bb09f827df630cbfc80e87869351f18 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 2 May 2011 16:27:46 -0700
Subject: [PATCH 244/600] ir_to_mesa: Replace open-coded swizzle_for_size()

---
 src/mesa/program/ir_to_mesa.cpp | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 9b615b68a23..1ef609fe15d 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -641,8 +641,6 @@ src_reg
 ir_to_mesa_visitor::get_temp(const glsl_type *type)
 {
    src_reg src;
-   int swizzle[4];
-   int i;
 
    src.file = PROGRAM_TEMPORARY;
    src.index = next_temp;
@@ -652,12 +650,7 @@ ir_to_mesa_visitor::get_temp(const glsl_type *type)
    if (type->is_array() || type->is_record()) {
       src.swizzle = SWIZZLE_NOOP;
    } else {
-      for (i = 0; i < type->vector_elements; i++)
-	 swizzle[i] = i;
-      for (; i < 4; i++)
-	 swizzle[i] = type->vector_elements - 1;
-      src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
-				  swizzle[2], swizzle[3]);
+      src.swizzle = swizzle_for_size(type->vector_elements);
    }
    src.negate = 0;
 

From b44648c9186d403abaeeeb3190d6759f951a49e4 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 5 Aug 2011 14:09:37 -0500
Subject: [PATCH 245/600] glsl_to_tgsi: try to avoid emitting a MOV_SAT to
 saturate an expression tree

This is a port of commit 62722d9 to glsl_to_tgsi, with minor aesthetic
changes (moved the declaration and assignment of new_inst inside the if block).
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 32 ++++++++++++++++++----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 460bafb3821..e10243add8a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1232,12 +1232,32 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
    sat_src->accept(this);
    st_src_reg src = this->result;
 
-   this->result = get_temp(ir->type);
-   st_dst_reg result_dst = st_dst_reg(this->result);
-   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
-   glsl_to_tgsi_instruction *inst;
-   inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
-   inst->saturate = true;
+   /* If we generated an expression instruction into a temporary in
+    * processing the saturate's operand, apply the saturate to that
+    * instruction.  Otherwise, generate a MOV to do the saturate.
+    *
+    * Note that we have to be careful to only do this optimization if
+    * the instruction in question was what generated src->result.  For
+    * example, ir_dereference_array might generate a MUL instruction
+    * to create the reladdr, and return us a src reg using that
+    * reladdr.  That MUL result is not the value we're trying to
+    * saturate.
+    */
+   ir_expression *sat_src_expr = sat_src->as_expression();
+   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
+			sat_src_expr->operation == ir_binop_add ||
+			sat_src_expr->operation == ir_binop_dot)) {
+      glsl_to_tgsi_instruction *new_inst;
+      new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+      new_inst->saturate = true;
+   } else {
+      this->result = get_temp(ir->type);
+      st_dst_reg result_dst = st_dst_reg(this->result);
+      result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+      glsl_to_tgsi_instruction *inst;
+      inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
+      inst->saturate = true;
+   }
 
    return true;
 }

From 5164244df02f33d6ad9e0a286f4b6d6af2dfbc75 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Fri, 5 Aug 2011 14:37:33 -0500
Subject: [PATCH 246/600] glsl_to_tgsi: replace open-coded swizzle_for_size()

This is a port of commit 4c7e215c7bb to glsl_to_tgsi.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e10243add8a..d7a1ba80e1d 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -949,8 +949,6 @@ st_src_reg
 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
 {
    st_src_reg src;
-   int swizzle[4];
-   int i;
 
    src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
    src.file = PROGRAM_TEMPORARY;
@@ -961,12 +959,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
    if (type->is_array() || type->is_record()) {
       src.swizzle = SWIZZLE_NOOP;
    } else {
-      for (i = 0; i < type->vector_elements; i++)
-         swizzle[i] = i;
-      for (; i < 4; i++)
-         swizzle[i] = type->vector_elements - 1;
-      src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
-        			  swizzle[2], swizzle[3]);
+      src.swizzle = swizzle_for_size(type->vector_elements);
    }
    src.negate = 0;
 

From a9e97d022cb68266639eb54947517454c8ffe45e Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 12:47:25 -0700
Subject: [PATCH 247/600] intel: Fix warnings from gl_constant_parameter
 changes.

---
 src/mesa/drivers/dri/i915/i915_fragprog.c |  6 ++----
 src/mesa/drivers/dri/i965/brw_wm_fp.c     | 10 +++++-----
 src/mesa/drivers/dri/i965/brw_wm_pass0.c  |  4 ++--
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 32050cebf33..d155b85ffca 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -175,10 +175,8 @@ src_vector(struct i915_fragment_program *p,
    case PROGRAM_STATE_VAR:
    case PROGRAM_NAMED_PARAM:
    case PROGRAM_UNIFORM:
-      src =
-         i915_emit_param4fv(p,
-                            program->Base.Parameters->ParameterValues[source->
-                                                                      Index]);
+      src = i915_emit_param4fv(p,
+	 &program->Base.Parameters->ParameterValues[source->Index][0].f);
       break;
 
    default:
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 7cd3edad235..d52a9581f5e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -535,15 +535,15 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c,
 						     GLfloat s3)
 {
    struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
-   GLfloat values[4];
+   gl_constant_value values[4];
    GLuint idx;
    GLuint swizzle;
    struct prog_src_register reg;
 
-   values[0] = s0;
-   values[1] = s1;
-   values[2] = s2;
-   values[3] = s3;
+   values[0].f = s0;
+   values[1].f = s1;
+   values[2].f = s2;
+   values[3].f = s3;
 
    idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
    reg = src_reg(PROGRAM_STATE_VAR, idx);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index f78bdc31866..ccf9dc2bc18 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -205,14 +205,14 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
 	 case PROGRAM_CONSTANT:
 	    /* These are invarient:
 	     */
-	    ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+	    ref = get_const_ref(c, &plist->ParameterValues[idx][component].f);
 	    break;
 
 	 case PROGRAM_STATE_VAR:
 	 case PROGRAM_UNIFORM:
 	    /* These may change from run to run:
 	     */
-	    ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
+	    ref = get_param_ref(c, &plist->ParameterValues[idx][component].f );
 	    break;
 
 	 default:

From db726b048e8858af226dbd0f0fda72d0be01394e Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 30 Jul 2011 21:26:26 -0700
Subject: [PATCH 248/600] mesa: In validate_program(), initialize errMsg for
 safety.

validate_program relies on validate_shader_program to fill in errMsg;
empirically, there exist cases where that doesn't happen.

While tracking those down may be worthwhile, initializing the string so
we don't try to ralloc_strdup random garbage also seems wise.

Fixes issues caught by valgrind while running some test case.

NOTE: This is a candidate for stable release branches.

Reviewed-by: Chad Versace <chad@chad-versace.us>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/main/shaderapi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 8df25c3f988..74997eaaa77 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1125,7 +1125,7 @@ static void
 validate_program(struct gl_context *ctx, GLuint program)
 {
    struct gl_shader_program *shProg;
-   char errMsg[100];
+   char errMsg[100] = "";
 
    shProg = _mesa_lookup_shader_program_err(ctx, program, "glValidateProgram");
    if (!shProg) {

From 1554e69e00566bc7255b82f5ea93b1f02f1a5bb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Sat, 6 Aug 2011 05:15:30 +0200
Subject: [PATCH 249/600] winsys/radeon: disable use of the buffer
 busy-for-write flag

---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 2eb9d134407..609a9065db8 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -192,6 +192,17 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf,
                 if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
                     cs->flush_cs(cs->flush_data, 0);
                     radeon_bo_wait((struct pb_buffer*)bo);
+                } else {
+                    /* XXX We could check whether the buffer is busy for write here. */
+                    radeon_bo_wait((struct pb_buffer*)bo);
+                }
+#if 0
+                /* XXX This per-winsys busy-for-write tracking sucks.
+                 * What if some other process wrote something, e.g. using
+                 * DRI2CopyRegion? We wouldn't get the busy_for_write flag
+                 * set, skipping bo_wait.
+                 * We need to move the is-busy-for-write query into the kernel.
+                 */
                 } else if (bo->busy_for_write) {
                     /* Update the busy_for_write field (done by radeon_bo_is_busy)
                      * and wait if needed. */
@@ -199,6 +210,7 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf,
                         radeon_bo_wait((struct pb_buffer*)bo);
                     }
                 }
+#endif
             } else {
                 /* Mapping for write. */
                 if (radeon_bo_is_referenced_by_cs(cs, bo)) {

From 425b179fafe93ddf4abacbccb67ed6aecbef6a7e Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 5 Aug 2011 20:10:04 +0200
Subject: [PATCH 250/600] st/mesa: don't resolve stencil twice

---
 src/mesa/state_tracker/st_cb_blit.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c
index 626db12431d..750f541b5dd 100644
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -107,8 +107,10 @@ st_BlitFramebuffer_resolve(struct gl_context *ctx,
          dstRb = st_renderbuffer(dstDepth->Renderbuffer);
 
          info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0;
-         if (combined && (mask & GL_STENCIL_BUFFER_BIT))
+         if (combined && (mask & GL_STENCIL_BUFFER_BIT)) {
+            mask &= ~GL_STENCIL_BUFFER_BIT;
             info->mask |= PIPE_MASK_S;
+         }
 
          info->src.res = srcRb->texture;
          info->src.layer = srcRb->surface->u.tex.first_layer;

From 9e466e87e6fde23f8ec0923be86005be81ac2d24 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 25 Jul 2011 18:13:26 +0200
Subject: [PATCH 251/600] nv50,nvc0: never convert in resource copy when format
 sizes match

If there are any cases left where the st thinks that RGBA -> BGRA
will swap components, it will get what it deserves.

Now the GPU's 2D engine goes unused. What a shame.
---
 src/gallium/drivers/nv50/nv50_surface.c | 9 ++++++++-
 src/gallium/drivers/nvc0/nvc0_surface.c | 9 ++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 1a5077e970b..8bca900e1ff 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -198,6 +198,7 @@ nv50_resource_copy_region(struct pipe_context *pipe,
 {
    struct nv50_screen *screen = nv50_context(pipe)->screen;
    int ret;
+   boolean m2mf;
    unsigned dst_layer = dstz, src_layer = src_box->z;
 
    /* Fallback for buffers. */
@@ -207,9 +208,15 @@ nv50_resource_copy_region(struct pipe_context *pipe,
       return;
    }
 
+   assert(src->nr_samples == dst->nr_samples);
+
+   m2mf = (src->format == dst->format) ||
+      (util_format_get_blocksizebits(src->format) ==
+       util_format_get_blocksizebits(dst->format));
+
    nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
 
-   if (src->format == dst->format && src->nr_samples == dst->nr_samples) {
+   if (m2mf) {
       struct nv50_m2mf_rect drect, srect;
       unsigned i;
       unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
index 67bba3c6cc3..a4fd17e5324 100644
--- a/src/gallium/drivers/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nvc0/nvc0_surface.c
@@ -205,6 +205,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
 {
    struct nvc0_screen *screen = nvc0_context(pipe)->screen;
    int ret;
+   boolean m2mf;
    unsigned dst_layer = dstz, src_layer = src_box->z;
 
    /* Fallback for buffers. */
@@ -214,9 +215,15 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
       return;
    }
 
+   assert(src->nr_samples == dst->nr_samples);
+
+   m2mf = (src->format == dst->format) ||
+      (util_format_get_blocksizebits(src->format) ==
+       util_format_get_blocksizebits(dst->format));
+
    nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
 
-   if (src->format == dst->format && src->nr_samples == dst->nr_samples) {
+   if (m2mf) {
       struct nv50_m2mf_rect drect, srect;
       unsigned i;
       unsigned nx = util_format_get_nblocksx(src->format, src_box->width);

From 4dd3272df9f6d483cb3734c3b8c77e9c190b3773 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 7 Aug 2011 15:34:07 +0200
Subject: [PATCH 252/600] d3d1x: adapt to resource_resolve interface change

---
 .../d3d1x/gd3d11/d3d11_context.h              | 23 ++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
index 12f2aaddc91..aedf82a4381 100644
--- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
+++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
@@ -1726,9 +1726,26 @@ changed:
 		SYNCHRONIZED;
 		GalliumD3D11Resource<>* dst = (GalliumD3D11Resource<>*)dst_resource;
 		GalliumD3D11Resource<>* src = (GalliumD3D11Resource<>*)src_resource;
-		unsigned dst_layer = d3d11_subresource_to_face(dst->resource, dst_subresource);
-		unsigned src_layer = d3d11_subresource_to_face(src->resource, src_subresource);
-		pipe->resource_resolve(pipe, dst->resource, dst_layer, src->resource, src_layer);
+		struct pipe_resolve_info info;
+
+		info.dst.res = dst->resource;
+		info.src.res = src->resource;
+		info.dst.level = 0;
+		info.dst.layer = d3d11_subresource_to_face(dst->resource, dst_subresource);
+		info.src.layer = d3d11_subresource_to_face(src->resource, src_subresource);
+
+		info.src.x0 = 0;
+		info.src.x1 = info.src.res->width0;
+		info.src.y0 = 0;
+		info.src.y1 = info.src.res->height0;
+		info.dst.x0 = 0;
+		info.dst.x1 = info.dst.res->width0;
+		info.dst.y0 = 0;
+		info.dst.y1 = info.dst.res->height0;
+
+		info.mask = PIPE_MASK_RGBA | PIPE_MASK_ZS;
+
+		pipe->resource_resolve(pipe, &info);
 	}
 
 #if API >= 11

From 8488112d20d49d3dc7fefef19c6e550e4b71661c Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 5 Aug 2011 15:01:41 -0600
Subject: [PATCH 253/600] mesa: whitespace changes

---
 src/mesa/program/prog_parameter.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h
index f858cf0fa0d..1a5ed343937 100644
--- a/src/mesa/program/prog_parameter.h
+++ b/src/mesa/program/prog_parameter.h
@@ -46,16 +46,19 @@
 #define PROG_PARAM_BIT_CYL_WRAP  0x10  /**< XXX gallium debug */
 /*@}*/
 
+
 /**
  * Actual data for constant values of parameters.
  */
-typedef union gl_constant_value {
-	GLfloat f;
-	GLboolean b;
-	GLint i;
-	GLuint u;
+typedef union gl_constant_value
+{
+   GLfloat f;
+   GLboolean b;
+   GLint i;
+   GLuint u;
 } gl_constant_value;
 
+
 /**
  * Program parameter.
  * Used by shaders/programs for uniforms, constants, varying vars, etc.

From 7d4d8a8de7c5877108040fa692f2914452b10789 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Aug 2011 09:00:06 -0600
Subject: [PATCH 254/600] gallium: silence warnings about trailing commas in
 enum lists

---
 src/gallium/include/pipe/p_defines.h     | 2 +-
 src/gallium/include/pipe/p_video_enums.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 1ef3ae71e76..795de1fbf62 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -495,7 +495,7 @@ enum pipe_shader_cap
    PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR = 14,
    PIPE_SHADER_CAP_INDIRECT_CONST_ADDR = 15,
    PIPE_SHADER_CAP_SUBROUTINES = 16, /* BGNSUB, ENDSUB, CAL, RET */
-   PIPE_SHADER_CAP_INTEGERS = 17,
+   PIPE_SHADER_CAP_INTEGERS = 17
 };
 
 
diff --git a/src/gallium/include/pipe/p_video_enums.h b/src/gallium/include/pipe/p_video_enums.h
index 492ab84e33f..13786067d53 100644
--- a/src/gallium/include/pipe/p_video_enums.h
+++ b/src/gallium/include/pipe/p_video_enums.h
@@ -50,7 +50,7 @@ enum pipe_video_cap
    PIPE_VIDEO_CAP_SUPPORTED = 0,
    PIPE_VIDEO_CAP_NPOT_TEXTURES = 1,
    PIPE_VIDEO_CAP_MAX_WIDTH = 2,
-   PIPE_VIDEO_CAP_MAX_HEIGHT = 3,
+   PIPE_VIDEO_CAP_MAX_HEIGHT = 3
 };
 
 enum pipe_video_codec

From 75a98740215d82447e5189b36d1dbfa59fcdd5db Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Aug 2011 09:00:57 -0600
Subject: [PATCH 255/600] glsl: silence warning about trailing comma in enum
 list

---
 src/glsl/ir_function.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp
index 2a4de5b0dcd..51d32b46f98 100644
--- a/src/glsl/ir_function.cpp
+++ b/src/glsl/ir_function.cpp
@@ -27,7 +27,7 @@
 typedef enum {
    PARAMETER_LIST_NO_MATCH,
    PARAMETER_LIST_EXACT_MATCH,
-   PARAMETER_LIST_INEXACT_MATCH, /*< Match requires implicit conversion. */
+   PARAMETER_LIST_INEXACT_MATCH /*< Match requires implicit conversion. */
 } parameter_list_match_t;
 
 /**

From 506de1954919e5346f382e66a7ec111af7e71a56 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 8 Aug 2011 09:01:13 -0600
Subject: [PATCH 256/600] glext: upgrade to version 72

---
 include/GL/glext.h | 367 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 364 insertions(+), 3 deletions(-)

diff --git a/include/GL/glext.h b/include/GL/glext.h
index 9048515c6d9..09400215bac 100644
--- a/include/GL/glext.h
+++ b/include/GL/glext.h
@@ -29,9 +29,9 @@ extern "C" {
 */
 
 /* Header file version number, required by OpenGL ABI for Linux */
-/* glext.h last updated $Date: 2011-07-06 02:49:14 -0700 (Wed, 06 Jul 2011) $ */
+/* glext.h last updated $Date: 2011-08-08 00:34:29 -0700 (Mon, 08 Aug 2011) $ */
 /* Current version at http://www.opengl.org/registry/ */
-#define GL_GLEXT_VERSION 71
+#define GL_GLEXT_VERSION 72
 /* Function declaration macros - to move into glplatform.h */
 
 #if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__)
@@ -1047,6 +1047,124 @@ extern "C" {
 /* reuse GL_UNDEFINED_VERTEX */
 #endif
 
+#ifndef GL_VERSION_4_2
+/* Reuse tokens from ARB_base_instance (none) */
+/* Reuse tokens from ARB_shading_language_420pack (none) */
+/* Reuse tokens from ARB_transform_feedback_instanced (none) */
+/* Reuse tokens from ARB_compressed_texture_pixel_storage */
+/* reuse GL_UNPACK_COMPRESSED_BLOCK_WIDTH */
+/* reuse GL_UNPACK_COMPRESSED_BLOCK_HEIGHT */
+/* reuse GL_UNPACK_COMPRESSED_BLOCK_DEPTH */
+/* reuse GL_UNPACK_COMPRESSED_BLOCK_SIZE */
+/* reuse GL_PACK_COMPRESSED_BLOCK_WIDTH */
+/* reuse GL_PACK_COMPRESSED_BLOCK_HEIGHT */
+/* reuse GL_PACK_COMPRESSED_BLOCK_DEPTH */
+/* reuse GL_PACK_COMPRESSED_BLOCK_SIZE */
+/* Reuse tokens from ARB_conservative_depth (none) */
+/* Reuse tokens from ARB_internalformat_query */
+/* reuse GL_NUM_SAMPLE_COUNTS */
+/* Reuse tokens from ARB_map_buffer_alignment */
+/* reuse GL_MIN_MAP_BUFFER_ALIGNMENT */
+/* Reuse tokens from ARB_shader_atomic_counters */
+/* reuse GL_ATOMIC_COUNTER_BUFFER */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_BINDING */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_START */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_SIZE */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_DATA_SIZE */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTERS */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTER_INDICES */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_VERTEX_SHADER */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_GEOMETRY_SHADER */
+/* reuse GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_FRAGMENT_SHADER */
+/* reuse GL_MAX_VERTEX_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_MAX_TESS_CONTROL_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_MAX_TESS_EVALUATION_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_MAX_COMBINED_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_MAX_VERTEX_ATOMIC_COUNTERS */
+/* reuse GL_MAX_TESS_CONTROL_ATOMIC_COUNTERS */
+/* reuse GL_MAX_TESS_EVALUATION_ATOMIC_COUNTERS */
+/* reuse GL_MAX_GEOMETRY_ATOMIC_COUNTERS */
+/* reuse GL_MAX_FRAGMENT_ATOMIC_COUNTERS */
+/* reuse GL_MAX_COMBINED_ATOMIC_COUNTERS */
+/* reuse GL_MAX_ATOMIC_COUNTER_BUFFER_SIZE */
+/* reuse GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS */
+/* reuse GL_ACTIVE_ATOMIC_COUNTER_BUFFERS */
+/* reuse GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX */
+/* reuse GL_UNSIGNED_INT_ATOMIC_COUNTER */
+/* Reuse tokens from ARB_shader_image_load_store */
+/* reuse GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT */
+/* reuse GL_ELEMENT_ARRAY_BARRIER_BIT */
+/* reuse GL_UNIFORM_BARRIER_BIT */
+/* reuse GL_TEXTURE_FETCH_BARRIER_BIT */
+/* reuse GL_SHADER_IMAGE_ACCESS_BARRIER_BIT */
+/* reuse GL_COMMAND_BARRIER_BIT */
+/* reuse GL_PIXEL_BUFFER_BARRIER_BIT */
+/* reuse GL_TEXTURE_UPDATE_BARRIER_BIT */
+/* reuse GL_BUFFER_UPDATE_BARRIER_BIT */
+/* reuse GL_FRAMEBUFFER_BARRIER_BIT */
+/* reuse GL_TRANSFORM_FEEDBACK_BARRIER_BIT */
+/* reuse GL_ATOMIC_COUNTER_BARRIER_BIT */
+/* reuse GL_ALL_BARRIER_BITS */
+/* reuse GL_MAX_IMAGE_UNITS */
+/* reuse GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS */
+/* reuse GL_IMAGE_BINDING_NAME */
+/* reuse GL_IMAGE_BINDING_LEVEL */
+/* reuse GL_IMAGE_BINDING_LAYERED */
+/* reuse GL_IMAGE_BINDING_LAYER */
+/* reuse GL_IMAGE_BINDING_ACCESS */
+/* reuse GL_IMAGE_1D */
+/* reuse GL_IMAGE_2D */
+/* reuse GL_IMAGE_3D */
+/* reuse GL_IMAGE_2D_RECT */
+/* reuse GL_IMAGE_CUBE */
+/* reuse GL_IMAGE_BUFFER */
+/* reuse GL_IMAGE_1D_ARRAY */
+/* reuse GL_IMAGE_2D_ARRAY */
+/* reuse GL_IMAGE_CUBE_MAP_ARRAY */
+/* reuse GL_IMAGE_2D_MULTISAMPLE */
+/* reuse GL_IMAGE_2D_MULTISAMPLE_ARRAY */
+/* reuse GL_INT_IMAGE_1D */
+/* reuse GL_INT_IMAGE_2D */
+/* reuse GL_INT_IMAGE_3D */
+/* reuse GL_INT_IMAGE_2D_RECT */
+/* reuse GL_INT_IMAGE_CUBE */
+/* reuse GL_INT_IMAGE_BUFFER */
+/* reuse GL_INT_IMAGE_1D_ARRAY */
+/* reuse GL_INT_IMAGE_2D_ARRAY */
+/* reuse GL_INT_IMAGE_CUBE_MAP_ARRAY */
+/* reuse GL_INT_IMAGE_2D_MULTISAMPLE */
+/* reuse GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY */
+/* reuse GL_UNSIGNED_INT_IMAGE_1D */
+/* reuse GL_UNSIGNED_INT_IMAGE_2D */
+/* reuse GL_UNSIGNED_INT_IMAGE_3D */
+/* reuse GL_UNSIGNED_INT_IMAGE_2D_RECT */
+/* reuse GL_UNSIGNED_INT_IMAGE_CUBE */
+/* reuse GL_UNSIGNED_INT_IMAGE_BUFFER */
+/* reuse GL_UNSIGNED_INT_IMAGE_1D_ARRAY */
+/* reuse GL_UNSIGNED_INT_IMAGE_2D_ARRAY */
+/* reuse GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY */
+/* reuse GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE */
+/* reuse GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY */
+/* reuse GL_MAX_IMAGE_SAMPLES */
+/* reuse GL_IMAGE_BINDING_FORMAT */
+/* reuse GL_IMAGE_FORMAT_COMPATIBILITY_TYPE */
+/* reuse GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE */
+/* reuse GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS */
+/* reuse GL_MAX_VERTEX_IMAGE_UNIFORMS */
+/* reuse GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS */
+/* reuse GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS */
+/* reuse GL_MAX_GEOMETRY_IMAGE_UNIFORMS */
+/* reuse GL_MAX_FRAGMENT_IMAGE_UNIFORMS */
+/* reuse GL_MAX_COMBINED_IMAGE_UNIFORMS */
+/* Reuse tokens from ARB_shading_language_packing (none) */
+/* Reuse tokens from ARB_texture_storage */
+/* reuse GL_TEXTURE_IMMUTABLE_FORMAT */
+#endif
+
 #ifndef GL_ARB_multitexture
 #define GL_TEXTURE0_ARB                   0x84C0
 #define GL_TEXTURE1_ARB                   0x84C1
@@ -2140,6 +2258,143 @@ extern "C" {
 #ifndef GL_ARB_shader_stencil_export
 #endif
 
+#ifndef GL_ARB_base_instance
+#endif
+
+#ifndef GL_ARB_shading_language_420pack
+#endif
+
+#ifndef GL_ARB_transform_feedback_instanced
+#endif
+
+#ifndef GL_ARB_compressed_texture_pixel_storage
+#define GL_UNPACK_COMPRESSED_BLOCK_WIDTH  0x9127
+#define GL_UNPACK_COMPRESSED_BLOCK_HEIGHT 0x9128
+#define GL_UNPACK_COMPRESSED_BLOCK_DEPTH  0x9129
+#define GL_UNPACK_COMPRESSED_BLOCK_SIZE   0x912A
+#define GL_PACK_COMPRESSED_BLOCK_WIDTH    0x912B
+#define GL_PACK_COMPRESSED_BLOCK_HEIGHT   0x912C
+#define GL_PACK_COMPRESSED_BLOCK_DEPTH    0x912D
+#define GL_PACK_COMPRESSED_BLOCK_SIZE     0x912E
+#endif
+
+#ifndef GL_ARB_conservative_depth
+#endif
+
+#ifndef GL_ARB_internalformat_query
+#define GL_NUM_SAMPLE_COUNTS              0x9380
+#endif
+
+#ifndef GL_ARB_map_buffer_alignment
+#define GL_MIN_MAP_BUFFER_ALIGNMENT       0x90BC
+#endif
+
+#ifndef GL_ARB_shader_atomic_counters
+#define GL_ATOMIC_COUNTER_BUFFER          0x92C0
+#define GL_ATOMIC_COUNTER_BUFFER_BINDING  0x92C1
+#define GL_ATOMIC_COUNTER_BUFFER_START    0x92C2
+#define GL_ATOMIC_COUNTER_BUFFER_SIZE     0x92C3
+#define GL_ATOMIC_COUNTER_BUFFER_DATA_SIZE 0x92C4
+#define GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTERS 0x92C5
+#define GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTER_INDICES 0x92C6
+#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_VERTEX_SHADER 0x92C7
+#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER 0x92C8
+#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER 0x92C9
+#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_GEOMETRY_SHADER 0x92CA
+#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_FRAGMENT_SHADER 0x92CB
+#define GL_MAX_VERTEX_ATOMIC_COUNTER_BUFFERS 0x92CC
+#define GL_MAX_TESS_CONTROL_ATOMIC_COUNTER_BUFFERS 0x92CD
+#define GL_MAX_TESS_EVALUATION_ATOMIC_COUNTER_BUFFERS 0x92CE
+#define GL_MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS 0x92CF
+#define GL_MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS 0x92D0
+#define GL_MAX_COMBINED_ATOMIC_COUNTER_BUFFERS 0x92D1
+#define GL_MAX_VERTEX_ATOMIC_COUNTERS     0x92D2
+#define GL_MAX_TESS_CONTROL_ATOMIC_COUNTERS 0x92D3
+#define GL_MAX_TESS_EVALUATION_ATOMIC_COUNTERS 0x92D4
+#define GL_MAX_GEOMETRY_ATOMIC_COUNTERS   0x92D5
+#define GL_MAX_FRAGMENT_ATOMIC_COUNTERS   0x92D6
+#define GL_MAX_COMBINED_ATOMIC_COUNTERS   0x92D7
+#define GL_MAX_ATOMIC_COUNTER_BUFFER_SIZE 0x92D8
+#define GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS 0x92DC
+#define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS  0x92D9
+#define GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX 0x92DA
+#define GL_UNSIGNED_INT_ATOMIC_COUNTER    0x92DB
+#endif
+
+#ifndef GL_ARB_shader_image_load_store
+#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
+#define GL_ELEMENT_ARRAY_BARRIER_BIT      0x00000002
+#define GL_UNIFORM_BARRIER_BIT            0x00000004
+#define GL_TEXTURE_FETCH_BARRIER_BIT      0x00000008
+#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
+#define GL_COMMAND_BARRIER_BIT            0x00000040
+#define GL_PIXEL_BUFFER_BARRIER_BIT       0x00000080
+#define GL_TEXTURE_UPDATE_BARRIER_BIT     0x00000100
+#define GL_BUFFER_UPDATE_BARRIER_BIT      0x00000200
+#define GL_FRAMEBUFFER_BARRIER_BIT        0x00000400
+#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
+#define GL_ATOMIC_COUNTER_BARRIER_BIT     0x00001000
+#define GL_ALL_BARRIER_BITS               0xFFFFFFFF
+#define GL_MAX_IMAGE_UNITS                0x8F38
+#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39
+#define GL_IMAGE_BINDING_NAME             0x8F3A
+#define GL_IMAGE_BINDING_LEVEL            0x8F3B
+#define GL_IMAGE_BINDING_LAYERED          0x8F3C
+#define GL_IMAGE_BINDING_LAYER            0x8F3D
+#define GL_IMAGE_BINDING_ACCESS           0x8F3E
+#define GL_IMAGE_1D                       0x904C
+#define GL_IMAGE_2D                       0x904D
+#define GL_IMAGE_3D                       0x904E
+#define GL_IMAGE_2D_RECT                  0x904F
+#define GL_IMAGE_CUBE                     0x9050
+#define GL_IMAGE_BUFFER                   0x9051
+#define GL_IMAGE_1D_ARRAY                 0x9052
+#define GL_IMAGE_2D_ARRAY                 0x9053
+#define GL_IMAGE_CUBE_MAP_ARRAY           0x9054
+#define GL_IMAGE_2D_MULTISAMPLE           0x9055
+#define GL_IMAGE_2D_MULTISAMPLE_ARRAY     0x9056
+#define GL_INT_IMAGE_1D                   0x9057
+#define GL_INT_IMAGE_2D                   0x9058
+#define GL_INT_IMAGE_3D                   0x9059
+#define GL_INT_IMAGE_2D_RECT              0x905A
+#define GL_INT_IMAGE_CUBE                 0x905B
+#define GL_INT_IMAGE_BUFFER               0x905C
+#define GL_INT_IMAGE_1D_ARRAY             0x905D
+#define GL_INT_IMAGE_2D_ARRAY             0x905E
+#define GL_INT_IMAGE_CUBE_MAP_ARRAY       0x905F
+#define GL_INT_IMAGE_2D_MULTISAMPLE       0x9060
+#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061
+#define GL_UNSIGNED_INT_IMAGE_1D          0x9062
+#define GL_UNSIGNED_INT_IMAGE_2D          0x9063
+#define GL_UNSIGNED_INT_IMAGE_3D          0x9064
+#define GL_UNSIGNED_INT_IMAGE_2D_RECT     0x9065
+#define GL_UNSIGNED_INT_IMAGE_CUBE        0x9066
+#define GL_UNSIGNED_INT_IMAGE_BUFFER      0x9067
+#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY    0x9068
+#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY    0x9069
+#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
+#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B
+#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C
+#define GL_MAX_IMAGE_SAMPLES              0x906D
+#define GL_IMAGE_BINDING_FORMAT           0x906E
+#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
+#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
+#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
+#define GL_MAX_VERTEX_IMAGE_UNIFORMS      0x90CA
+#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB
+#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC
+#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS    0x90CD
+#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS    0x90CE
+#define GL_MAX_COMBINED_IMAGE_UNIFORMS    0x90CF
+#endif
+
+#ifndef GL_ARB_shading_language_packing
+#endif
+
+#ifndef GL_ARB_texture_storage
+#define GL_TEXTURE_IMMUTABLE_FORMAT       0x912F
+#endif
+
 #ifndef GL_EXT_abgr
 #define GL_ABGR_EXT                       0x8000
 #endif
@@ -5917,7 +6172,7 @@ typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEIPROC) (GLuint buf, GLenum srcRGB,
 
 #ifndef GL_VERSION_4_1
 #define GL_VERSION_4_1 1
-/* OpenGL 4.1 also reuses entry points from these extensions: */
+/* OpenGL 4.1 reuses entry points from these extensions: */
 /* ARB_ES2_compatibility */
 /* ARB_get_program_binary */
 /* ARB_separate_shader_objects */
@@ -5926,6 +6181,22 @@ typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEIPROC) (GLuint buf, GLenum srcRGB,
 /* ARB_viewport_array */
 #endif
 
+#ifndef GL_VERSION_4_2
+#define GL_VERSION_4_2 1
+/* OpenGL 4.2 reuses entry points from these extensions: */
+/* ARB_base_instance */
+/* ARB_shading_language_420pack (no entry points) */
+/* ARB_transform_feedback_instanced */
+/* ARB_compressed_texture_pixel_storage (no entry points) */
+/* ARB_conservative_depth (no entry points) */
+/* ARB_internalformat_query */
+/* ARB_map_buffer_alignment (no entry points) */
+/* ARB_shader_atomic_counters */
+/* ARB_shader_image_load_store */
+/* ARB_shading_language_packing (no entry points) */
+/* ARB_texture_storage */
+#endif
+
 #ifndef GL_ARB_multitexture
 #define GL_ARB_multitexture 1
 #ifdef GL_GLEXT_PROTOTYPES
@@ -6851,6 +7122,10 @@ typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERFVPROC) (GLuint sampler, GLenum
 typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERIUIVPROC) (GLuint sampler, GLenum pname, GLuint *params);
 #endif
 
+#ifndef GL_ARB_shader_bit_encoding
+#define GL_ARB_shader_bit_encoding 1
+#endif
+
 #ifndef GL_ARB_texture_rgb10_a2ui
 #define GL_ARB_texture_rgb10_a2ui 1
 #endif
@@ -7357,6 +7632,92 @@ typedef void (APIENTRYP PFNGLGETNUNIFORMDVARBPROC) (GLuint program, GLint locati
 #define GL_ARB_shader_stencil_export 1
 #endif
 
+#ifndef GL_ARB_base_instance
+#define GL_ARB_base_instance 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDrawArraysInstancedBaseInstance (GLenum mode, GLint first, GLsizei count, GLsizei primcount, GLuint baseinstance);
+GLAPI void APIENTRY glDrawElementsInstancedBaseInstance (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount, GLuint baseinstance);
+GLAPI void APIENTRY glDrawElementsInstancedBaseVertexBaseInstance (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount, GLint basevertex, GLuint baseinstance);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDRAWARRAYSINSTANCEDBASEINSTANCEPROC) (GLenum mode, GLint first, GLsizei count, GLsizei primcount, GLuint baseinstance);
+typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEINSTANCEPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount, GLuint baseinstance);
+typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount, GLint basevertex, GLuint baseinstance);
+#endif
+
+#ifndef GL_ARB_shading_language_420pack
+#define GL_ARB_shading_language_420pack 1
+#endif
+
+#ifndef GL_ARB_transform_feedback_instanced
+#define GL_ARB_transform_feedback_instanced 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glDrawTransformFeedbackInstanced (GLenum mode, GLuint id, GLsizei primcount);
+GLAPI void APIENTRY glDrawTransformFeedbackStreamInstanced (GLenum mode, GLuint id, GLuint stream, GLsizei primcount);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKINSTANCEDPROC) (GLenum mode, GLuint id, GLsizei primcount);
+typedef void (APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC) (GLenum mode, GLuint id, GLuint stream, GLsizei primcount);
+#endif
+
+#ifndef GL_ARB_compressed_texture_pixel_storage
+#define GL_ARB_compressed_texture_pixel_storage 1
+#endif
+
+#ifndef GL_ARB_conservative_depth
+#define GL_ARB_conservative_depth 1
+#endif
+
+#ifndef GL_ARB_internalformat_query
+#define GL_ARB_internalformat_query 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGetInternalformativ (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGETINTERNALFORMATIVPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params);
+#endif
+
+#ifndef GL_ARB_map_buffer_alignment
+#define GL_ARB_map_buffer_alignment 1
+#endif
+
+#ifndef GL_ARB_shader_atomic_counters
+#define GL_ARB_shader_atomic_counters 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glGetActiveAtomicCounterBufferiv (GLuint program, GLuint bufferIndex, GLenum pname, GLint *params);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLGETACTIVEATOMICCOUNTERBUFFERIVPROC) (GLuint program, GLuint bufferIndex, GLenum pname, GLint *params);
+#endif
+
+#ifndef GL_ARB_shader_image_load_store
+#define GL_ARB_shader_image_load_store 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glBindImageTexture (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format);
+GLAPI void APIENTRY glMemoryBarrier (GLbitfield barriers);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLBINDIMAGETEXTUREPROC) (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format);
+typedef void (APIENTRYP PFNGLMEMORYBARRIERPROC) (GLbitfield barriers);
+#endif
+
+#ifndef GL_ARB_shading_language_packing
+#define GL_ARB_shading_language_packing 1
+#endif
+
+#ifndef GL_ARB_texture_storage
+#define GL_ARB_texture_storage 1
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glTexStorage1D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+GLAPI void APIENTRY glTexStorage2D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+GLAPI void APIENTRY glTexStorage3D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+GLAPI void APIENTRY glTextureStorage1DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+GLAPI void APIENTRY glTextureStorage2DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+GLAPI void APIENTRY glTextureStorage3DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (APIENTRYP PFNGLTEXSTORAGE1DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+typedef void (APIENTRYP PFNGLTEXSTORAGE2DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLTEXSTORAGE3DPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+typedef void (APIENTRYP PFNGLTEXTURESTORAGE1DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+typedef void (APIENTRYP PFNGLTEXTURESTORAGE2DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLTEXTURESTORAGE3DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+#endif
+
 #ifndef GL_EXT_abgr
 #define GL_EXT_abgr 1
 #endif

From ffb7d02154186402f64e0b628998485309774bb8 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sun, 7 Aug 2011 14:15:35 -0500
Subject: [PATCH 257/600] st/mesa: inline st_prepare_fragment_program in
 st_translate_fragment_program

This reverts an unnecessary part of commit 4683529048ee and fixes misrendering
and an assertion failure in Cogs.

Fixes freedesktop.org bug 39888.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/state_tracker/st_program.c | 326 ++++++++++++++--------------
 src/mesa/state_tracker/st_program.h |  15 --
 2 files changed, 162 insertions(+), 179 deletions(-)

diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index ca01d2e1976..a4f47edfcd3 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -416,151 +416,6 @@ st_get_vp_variant(struct st_context *st,
    return vpv;
 }
 
-/**
- * Translate Mesa fragment shader attributes to TGSI attributes.
- * \return GL_TRUE if color output should be written to all render targets, 
- *         GL_FALSE if not
- */
-GLboolean
-st_prepare_fragment_program(struct gl_context *ctx,
-                            struct st_fragment_program *stfp)
-{
-   GLuint attr;
-   const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
-   GLboolean write_all = GL_FALSE;
-
-   /*
-    * Convert Mesa program inputs to TGSI input register semantics.
-    */
-   for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
-      if (inputsRead & (1 << attr)) {
-         const GLuint slot = stfp->num_inputs++;
-
-         stfp->input_to_index[attr] = slot;
-
-         switch (attr) {
-         case FRAG_ATTRIB_WPOS:
-            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
-            stfp->input_semantic_index[slot] = 0;
-            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
-            break;
-         case FRAG_ATTRIB_COL0:
-            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-            stfp->input_semantic_index[slot] = 0;
-            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
-            break;
-         case FRAG_ATTRIB_COL1:
-            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-            stfp->input_semantic_index[slot] = 1;
-            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
-            break;
-         case FRAG_ATTRIB_FOGC:
-            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
-            stfp->input_semantic_index[slot] = 0;
-            stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
-            break;
-         case FRAG_ATTRIB_FACE:
-            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
-            stfp->input_semantic_index[slot] = 0;
-            stfp->interp_mode[slot] = TGSI_INTERPOLATE_CONSTANT;
-            break;
-            /* In most cases, there is nothing special about these
-             * inputs, so adopt a convention to use the generic
-             * semantic name and the mesa FRAG_ATTRIB_ number as the
-             * index. 
-             * 
-             * All that is required is that the vertex shader labels
-             * its own outputs similarly, and that the vertex shader
-             * generates at least every output required by the
-             * fragment shader plus fixed-function hardware (such as
-             * BFC).
-             * 
-             * There is no requirement that semantic indexes start at
-             * zero or be restricted to a particular range -- nobody
-             * should be building tables based on semantic index.
-             */
-         case FRAG_ATTRIB_PNTC:
-         case FRAG_ATTRIB_TEX0:
-         case FRAG_ATTRIB_TEX1:
-         case FRAG_ATTRIB_TEX2:
-         case FRAG_ATTRIB_TEX3:
-         case FRAG_ATTRIB_TEX4:
-         case FRAG_ATTRIB_TEX5:
-         case FRAG_ATTRIB_TEX6:
-         case FRAG_ATTRIB_TEX7:
-         case FRAG_ATTRIB_VAR0:
-         default:
-            /* Actually, let's try and zero-base this just for
-             * readability of the generated TGSI.
-             */
-            assert(attr >= FRAG_ATTRIB_TEX0);
-            stfp->input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0);
-            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-            if (attr == FRAG_ATTRIB_PNTC)
-               stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
-            else
-               stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
-            break;
-         }
-      }
-      else {
-         stfp->input_to_index[attr] = -1;
-      }
-   }
-
-   /*
-    * Semantics and mapping for outputs
-    */
-   {
-      uint numColors = 0;
-      GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten;
-
-      /* if z is written, emit that first */
-      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
-         stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_POSITION;
-         stfp->output_semantic_index[stfp->num_outputs] = 0;
-         stfp->result_to_output[FRAG_RESULT_DEPTH] = stfp->num_outputs;
-         stfp->num_outputs++;
-         outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
-      }
-
-      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
-         stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_STENCIL;
-         stfp->output_semantic_index[stfp->num_outputs] = 0;
-         stfp->result_to_output[FRAG_RESULT_STENCIL] = stfp->num_outputs;
-         stfp->num_outputs++;
-         outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
-      }
-
-      /* handle remaning outputs (color) */
-      for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
-         if (outputsWritten & BITFIELD64_BIT(attr)) {
-            switch (attr) {
-            case FRAG_RESULT_DEPTH:
-            case FRAG_RESULT_STENCIL:
-               /* handled above */
-               assert(0);
-               break;
-            case FRAG_RESULT_COLOR:
-               write_all = GL_TRUE; /* fallthrough */
-            default:
-               assert(attr == FRAG_RESULT_COLOR ||
-                      (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
-               stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_COLOR;
-               stfp->output_semantic_index[stfp->num_outputs] = numColors;
-               stfp->result_to_output[attr] = stfp->num_outputs;
-               numColors++;
-               break;
-            }
-
-            stfp->num_outputs++;
-         }
-      }
-   }
-   
-   return write_all;
-}
-
 
 /**
  * Translate a Mesa fragment shader into a TGSI shader using extra info in
@@ -613,12 +468,155 @@ st_translate_fragment_program(struct st_context *st,
 
    if (!stfp->tgsi.tokens) {
       /* need to translate Mesa instructions to TGSI now */
+      GLuint outputMapping[FRAG_RESULT_MAX];
+      GLuint inputMapping[FRAG_ATTRIB_MAX];
+      GLuint interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
+      GLuint attr;
+      const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
       struct ureg_program *ureg;
-      GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp);
+
+      GLboolean write_all = GL_FALSE;
+
+      ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
+      ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+      uint fs_num_inputs = 0;
+
+      ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
+      ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
+      uint fs_num_outputs = 0;
       
       if (!stfp->glsl_to_tgsi)
          _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
 
+      /*
+       * Convert Mesa program inputs to TGSI input register semantics.
+       */
+      for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
+         if (inputsRead & (1 << attr)) {
+            const GLuint slot = fs_num_inputs++;
+
+            inputMapping[attr] = slot;
+
+            switch (attr) {
+            case FRAG_ATTRIB_WPOS:
+               input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
+               input_semantic_index[slot] = 0;
+               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
+               break;
+            case FRAG_ATTRIB_COL0:
+               input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+               input_semantic_index[slot] = 0;
+               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
+               break;
+            case FRAG_ATTRIB_COL1:
+               input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+               input_semantic_index[slot] = 1;
+               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
+               break;
+            case FRAG_ATTRIB_FOGC:
+               input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+               input_semantic_index[slot] = 0;
+               interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
+               break;
+            case FRAG_ATTRIB_FACE:
+               input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
+               input_semantic_index[slot] = 0;
+               interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
+               break;
+               /* In most cases, there is nothing special about these
+                * inputs, so adopt a convention to use the generic
+                * semantic name and the mesa FRAG_ATTRIB_ number as the
+                * index. 
+                * 
+                * All that is required is that the vertex shader labels
+                * its own outputs similarly, and that the vertex shader
+                * generates at least every output required by the
+                * fragment shader plus fixed-function hardware (such as
+                * BFC).
+                * 
+                * There is no requirement that semantic indexes start at
+                * zero or be restricted to a particular range -- nobody
+                * should be building tables based on semantic index.
+                */
+            case FRAG_ATTRIB_PNTC:
+            case FRAG_ATTRIB_TEX0:
+            case FRAG_ATTRIB_TEX1:
+            case FRAG_ATTRIB_TEX2:
+            case FRAG_ATTRIB_TEX3:
+            case FRAG_ATTRIB_TEX4:
+            case FRAG_ATTRIB_TEX5:
+            case FRAG_ATTRIB_TEX6:
+            case FRAG_ATTRIB_TEX7:
+            case FRAG_ATTRIB_VAR0:
+            default:
+               /* Actually, let's try and zero-base this just for
+                * readability of the generated TGSI.
+                */
+               assert(attr >= FRAG_ATTRIB_TEX0);
+               input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0);
+               input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+               if (attr == FRAG_ATTRIB_PNTC)
+                  interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
+               else
+                  interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
+               break;
+            }
+         }
+         else {
+            inputMapping[attr] = -1;
+         }
+      }
+
+      /*
+       * Semantics and mapping for outputs
+       */
+      {
+         uint numColors = 0;
+         GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten;
+
+         /* if z is written, emit that first */
+         if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
+            fs_output_semantic_index[fs_num_outputs] = 0;
+            outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
+            fs_num_outputs++;
+            outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
+         }
+
+         if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
+            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
+            fs_output_semantic_index[fs_num_outputs] = 0;
+            outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
+            fs_num_outputs++;
+            outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
+         }
+
+         /* handle remaning outputs (color) */
+         for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
+            if (outputsWritten & BITFIELD64_BIT(attr)) {
+               switch (attr) {
+               case FRAG_RESULT_DEPTH:
+               case FRAG_RESULT_STENCIL:
+                  /* handled above */
+                  assert(0);
+                  break;
+               case FRAG_RESULT_COLOR:
+                  write_all = GL_TRUE; /* fallthrough */
+               default:
+                  assert(attr == FRAG_RESULT_COLOR ||
+                         (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
+                  fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
+                  fs_output_semantic_index[fs_num_outputs] = numColors;
+                  outputMapping[attr] = fs_num_outputs;
+                  numColors++;
+                  break;
+               }
+
+               fs_num_outputs++;
+            }
+         }
+      }
+
       ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
       if (ureg == NULL)
          return NULL;
@@ -638,32 +636,32 @@ st_translate_fragment_program(struct st_context *st,
                               stfp->glsl_to_tgsi,
                               &stfp->Base.Base,
                               /* inputs */
-                              stfp->num_inputs,
-                              stfp->input_to_index,
-                              stfp->input_semantic_name,
-                              stfp->input_semantic_index,
-                              stfp->interp_mode,
+                              fs_num_inputs,
+                              inputMapping,
+                              input_semantic_name,
+                              input_semantic_index,
+                              interpMode,
                               /* outputs */
-                              stfp->num_outputs,
-                              stfp->result_to_output,
-                              stfp->output_semantic_name,
-                              stfp->output_semantic_index, FALSE );
+                              fs_num_outputs,
+                              outputMapping,
+                              fs_output_semantic_name,
+                              fs_output_semantic_index, FALSE );
       else
          st_translate_mesa_program(st->ctx,
                                    TGSI_PROCESSOR_FRAGMENT,
                                    ureg,
                                    &stfp->Base.Base,
                                    /* inputs */
-                                   stfp->num_inputs,
-                                   stfp->input_to_index,
-                                   stfp->input_semantic_name,
-                                   stfp->input_semantic_index,
-                                   stfp->interp_mode,
+                                   fs_num_inputs,
+                                   inputMapping,
+                                   input_semantic_name,
+                                   input_semantic_index,
+                                   interpMode,
                                    /* outputs */
-                                   stfp->num_outputs,
-                                   stfp->result_to_output,
-                                   stfp->output_semantic_name,
-                                   stfp->output_semantic_index, FALSE );
+                                   fs_num_outputs,
+                                   outputMapping,
+                                   fs_output_semantic_name,
+                                   fs_output_semantic_index, FALSE );
 
       stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL );
       ureg_destroy( ureg );
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index 67723de6d53..699b6e8ccb7 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -85,21 +85,6 @@ struct st_fragment_program
 {
    struct gl_fragment_program Base;
    struct glsl_to_tgsi_visitor* glsl_to_tgsi;
-   
-   /** maps a Mesa FRAG_ATTRIB_x to a packed TGSI input index */
-   GLuint input_to_index[FRAG_ATTRIB_MAX];
-   /** maps a TGSI input index back to a Mesa FRAG_ATTRIB_x */
-   GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
-   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
-   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
-   GLuint num_inputs;
-   GLuint interp_mode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
-
-   /** Maps FRAG_RESULT_x to slot */
-   GLuint result_to_output[FRAG_RESULT_MAX];
-   ubyte output_semantic_name[FRAG_RESULT_MAX];
-   ubyte output_semantic_index[FRAG_RESULT_MAX];
-   GLuint num_outputs;
 
    struct pipe_shader_state tgsi;
 

From 482338842db6ad387316b52fbe9602eee56ad082 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Mon, 1 Aug 2011 13:06:06 -0700
Subject: [PATCH 258/600] Revert "glsl: Skip processing the first function's
 body in do_dead_functions()."

opt_dead_functions contained a shortcut to skip processing the first
function's body, based on the assumption that IR functions are
topologically sorted, with callees always coming before their callers
(therefore the first function cannot contain any calls).

This assumption turns out not to be true in general.  For example, the
following code snippet gets translated to IR that violates this
assumption:

    void f();
    void g();
    void f() { g(); }
    void g() { ... }

In practice, the shortcut didn't cause bugs because of a coincidence
of the circumstances in which opt_dead_functions is called:

(a) we do inlining right before dead function elimination, and
    inlining (when successful) eliminates all calls.

(b) for user-defined functions, inlining is always successful, because
    previous optimization passes (during compilation) have reduced
    them to a form that is eligible for inlining.

(c) the function that appears first in the IR can't possibly call a
    built-in function, because built-in functions are always emitted
    before the function that calls them.

It seems unnecessarily fragile to have opt_dead_functions depend on
these coincidences.  And the next patch in this series will break (c).
So I'm reverting the shortcut.  The consequence will be a slight
increase in link time for complex shaders.

This reverts commit c75427f4c8767e131e5fb3de44fbc9d904cb992d.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/opt_dead_functions.cpp | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/glsl/opt_dead_functions.cpp b/src/glsl/opt_dead_functions.cpp
index 7c64c618c0c..51c77e3b947 100644
--- a/src/glsl/opt_dead_functions.cpp
+++ b/src/glsl/opt_dead_functions.cpp
@@ -50,7 +50,6 @@ public:
    ir_dead_functions_visitor()
    {
       this->mem_ctx = ralloc_context(NULL);
-      this->seen_another_function_signature = false;
    }
 
    ~ir_dead_functions_visitor()
@@ -65,8 +64,6 @@ public:
 
    bool (*predicate)(ir_instruction *ir);
 
-   bool seen_another_function_signature;
-
    /* List of signature_entry */
    exec_list signature_list;
    void *mem_ctx;
@@ -97,13 +94,7 @@ ir_dead_functions_visitor::visit_enter(ir_function_signature *ir)
       entry->used = true;
    }
 
-   /* If this is the first signature to look at, no need to descend to see
-    * if it has calls to another function signature.
-    */
-   if (!this->seen_another_function_signature) {
-      this->seen_another_function_signature = true;
-      return visit_continue_with_parent;
-   }
+
 
    return visit_continue;
 }

From 0d81b0e18494a80c4326fbc98837842959675869 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Fri, 29 Jul 2011 15:28:52 -0700
Subject: [PATCH 259/600] glsl: Emit function signatures at toplevel, even for
 built-ins.

The ast-to-hir conversion needs to emit function signatures in two
circumstances: when a function declaration (or definition) is
encountered, and when a built-in function is encountered.

To avoid emitting a function signature in an illegal place (such as
inside a function), emit_function() checked whether we were inside a
function definition, and if so, emitted the signature before the
function definition.

However, this didn't cover the case of emitting function signatures
for built-in functions when those built-in functions are called from
inside the constant integer expression that specifies the length of a
global array.  This failed because when processing an array length, we
are emitting IR into a dummy exec_list (see process_array_type() in
ast_to_hir.cpp).  process_array_type() later checks (via an assertion)
that no instructions were emitted to the dummy exec_list, based on the
reasonable assumption that we shouldn't need to emit instructions to
calculate the value of a constant.

This patch changes emit_function() so that it emits function
signatures at toplevel in all cases.

This partially fixes bug 38625
(https://bugs.freedesktop.org/show_bug.cgi?id=38625).  The remainder
of the fix is in the patch that follows.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/ast.h                |  3 +--
 src/glsl/ast_function.cpp     |  2 +-
 src/glsl/ast_to_hir.cpp       | 31 ++++++++++++++-----------------
 src/glsl/glsl_parser_extras.h |  6 ++++++
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 878f48b2070..d1de2271873 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -730,7 +730,6 @@ _mesa_ast_field_selection_to_hir(const ast_expression *expr,
 				 struct _mesa_glsl_parse_state *state);
 
 void
-emit_function(_mesa_glsl_parse_state *state, exec_list *instructions,
-	      ir_function *f);
+emit_function(_mesa_glsl_parse_state *state, ir_function *f);
 
 #endif /* AST_H */
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 8bcf48dfd91..34a82f8ab75 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -125,7 +125,7 @@ match_function_by_name(exec_list *instructions, const char *name,
 	    if (f == NULL) {
 	       f = new(ctx) ir_function(name);
 	       state->symbols->add_global_function(f);
-	       emit_function(state, instructions, f);
+	       emit_function(state, f);
 	    }
 
 	    f->add_signature(sig->clone_prototype(f, NULL));
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 7da14611950..a6a0c328314 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -66,6 +66,8 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
 
    state->current_function = NULL;
 
+   state->toplevel_ir = instructions;
+
    /* Section 4.2 of the GLSL 1.20 specification states:
     * "The built-in functions are scoped in a scope outside the global scope
     *  users declare global variables in.  That is, a shader's global scope,
@@ -85,6 +87,8 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
       ast->hir(instructions, state);
 
    detect_recursion_unlinked(state, instructions);
+
+   state->toplevel_ir = NULL;
 }
 
 
@@ -2926,23 +2930,16 @@ ast_parameter_declarator::parameters_to_hir(exec_list *ast_parameters,
 
 
 void
-emit_function(_mesa_glsl_parse_state *state, exec_list *instructions,
-	      ir_function *f)
+emit_function(_mesa_glsl_parse_state *state, ir_function *f)
 {
-   /* Emit the new function header */
-   if (state->current_function == NULL) {
-      instructions->push_tail(f);
-   } else {
-      /* IR invariants disallow function declarations or definitions nested
-       * within other function definitions.  Insert the new ir_function
-       * block in the instruction sequence before the ir_function block
-       * containing the current ir_function_signature.
-       */
-      ir_function *const curr =
-	 const_cast<ir_function *>(state->current_function->function());
-
-      curr->insert_before(f);
-   }
+   /* IR invariants disallow function declarations or definitions
+    * nested within other function definitions.  But there is no
+    * requirement about the relative order of function declarations
+    * and definitions with respect to one another.  So simply insert
+    * the new ir_function block at the end of the toplevel instruction
+    * list.
+    */
+   state->toplevel_ir->push_tail(f);
 }
 
 
@@ -3069,7 +3066,7 @@ ast_function::hir(exec_list *instructions,
 	 return NULL;
       }
 
-      emit_function(state, instructions, f);
+      emit_function(state, f);
    }
 
    /* Verify the return type of main() */
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 2f4d3cba77f..fc392da5b21 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -129,6 +129,12 @@ struct _mesa_glsl_parse_state {
     */
    class ir_function_signature *current_function;
 
+   /**
+    * During AST to IR conversion, pointer to the toplevel IR
+    * instruction list being generated.
+    */
+   exec_list *toplevel_ir;
+
    /** Have we found a return statement in this function? */
    bool found_return;
 

From 789ee6516bfca289e1948ff8f2c147b94286a0e0 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Sat, 30 Jul 2011 11:55:53 -0700
Subject: [PATCH 260/600] glsl: Constant-fold built-in functions before
 outputting IR

Rearranged the logic for converting the ast for a function call to
hir, so that we constant fold before emitting any IR.  Previously we
would emit some IR, and then only later detect whether we could
constant fold.  The unnecessary IR would usually get cleaned up by a
later optimization step, however in the case of a builtin function
being used to compute an array size, it was causing an assertion.

Fixes Piglit test array-size-constant-relational.vert.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38625
---
 src/glsl/ast_function.cpp | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 34a82f8ab75..5b6ed3bc8f5 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -199,6 +199,20 @@ match_function_by_name(exec_list *instructions, const char *name,
        */
       ir_call *call = new(ctx) ir_call(sig, actual_parameters);
       if (!sig->return_type->is_void()) {
+         /* If the function call is a constant expression, don't
+          * generate the instructions to call it; just generate an
+          * ir_constant representing the constant value.
+          *
+          * Function calls can only be constant expressions starting
+          * in GLSL 1.20.
+          */
+         if (state->language_version >= 120) {
+            ir_constant *const_val = call->constant_expression_value();
+            if (const_val) {
+               return const_val;
+            }
+         }
+
 	 ir_variable *var;
 	 ir_dereference_variable *deref;
 
@@ -211,8 +225,6 @@ match_function_by_name(exec_list *instructions, const char *name,
 	 deref = new(ctx) ir_dereference_variable(var);
 	 ir_assignment *assign = new(ctx) ir_assignment(deref, call, NULL);
 	 instructions->push_tail(assign);
-	 if (state->language_version >= 120)
-	    var->constant_value = call->constant_expression_value();
 
 	 deref = new(ctx) ir_dereference_variable(var);
 	 return deref;

From d4144a123b603d3c33cb356cf3c8e5ae4653594e Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Mon, 1 Aug 2011 15:23:07 -0700
Subject: [PATCH 261/600] glsl: Check array size is const before asserting that
 no IR was generated.

process_array_type() contains an assertion to verify that no IR
instructions are generated while processing the expression that
specifies the size of the array.  This assertion needs to happen
_after_ checking whether the expression is constant.  Otherwise we may
crash on an illegal shader rather than reporting an error.

Fixes piglit tests array-size-non-builtin-function.vert and
array-size-with-side-effect.vert.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/ast_to_hir.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index a6a0c328314..2025911acd3 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -1769,11 +1769,6 @@ process_array_type(YYLTYPE *loc, const glsl_type *base, ast_node *array_size,
       ir_rvalue *const ir = array_size->hir(& dummy_instructions, state);
       YYLTYPE loc = array_size->get_location();
 
-      /* FINISHME: Verify that the grammar forbids side-effects in array
-       * FINISHME: sizes.   i.e., 'vec4 [x = 12] data'
-       */
-      assert(dummy_instructions.is_empty());
-
       if (ir != NULL) {
 	 if (!ir->type->is_integer()) {
 	    _mesa_glsl_error(& loc, state, "array size must be integer type");
@@ -1790,6 +1785,14 @@ process_array_type(YYLTYPE *loc, const glsl_type *base, ast_node *array_size,
 	    } else {
 	       assert(size->type == ir->type);
 	       length = size->value.u[0];
+
+               /* If the array size is const (and we've verified that
+                * it is) then no instructions should have been emitted
+                * when we converted it to HIR.  If they were emitted,
+                * then either the array size isn't const after all, or
+                * we are emitting unnecessary instructions.
+                */
+               assert(dummy_instructions.is_empty());
 	    }
 	 }
       }

From 01a851c296347d8e9d2166b3c83eab97404c0670 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Wed, 3 Aug 2011 16:16:59 -0700
Subject: [PATCH 262/600] glsl: When linking, emit functions at the tail of the
 final linked program.

When link_functions.cpp adds a new function to the final linked
program, it needs to add it after any global variable declarations
that the function refers to, otherwise the IR will be invalid (because
variable declarations must occur before variable accesses).  The
easiest way to do that is to have the linker emit functions to the
tail of the final linked program.

The linker used to emit functions to the head of the final linked
program, in an effort to keep callees sorted before their callers.
However, this was not reliable: it didn't work for functions declared
or defined in the same compilation unit as main, for diamond-shaped
patterns in the call graph, or for some obscure cases involving
overloaded functions.  And no code currently relies on this sort
order.

No Piglit regressions with i965 Ironlake.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/link_functions.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/glsl/link_functions.cpp b/src/glsl/link_functions.cpp
index d40f771e342..acee3271249 100644
--- a/src/glsl/link_functions.cpp
+++ b/src/glsl/link_functions.cpp
@@ -104,10 +104,12 @@ public:
       if (f == NULL) {
 	 f = new(linked) ir_function(name);
 
-	 /* Add the new function to the linked IR.
+	 /* Add the new function to the linked IR.  Put it at the end
+          * so that it comes after any global variable declarations
+          * that it refers to.
 	  */
 	 linked->symbols->add_function(f);
-	 linked->ir->push_head(f);
+	 linked->ir->push_tail(f);
       }
 
       ir_function_signature *linked_sig =

From c148ef6ddb3dbf256c26d82ed2f45f1fde55a231 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Wed, 3 Aug 2011 15:37:01 -0700
Subject: [PATCH 263/600] glsl: validate IR after linking (debug builds only)

At least one of the invariants verified by IR validation concerns the
relative ordering of toplevel constructs in the IR: references to
global variables must come after the declarations of those global
variables.

Since linking affects the ordering of toplevel constructs in the IR,
it's possible that a bug in the linker will cause invalid IR to be
generated, even if all the pre-linked shaders are valid.  (In fact,
such a bug was fixed by the previous commit.)

Bugs like this are easily masked by further optimization passes,
particularly inlining.  So to make them easier to track down, this
patch addes an IR validation step right after linking, and before
final optimization occurs.  The validation only occurs on debug
builds.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/linker.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 19eb9b5ff6f..b54ef41080a 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -922,6 +922,14 @@ link_intrastage_shaders(void *mem_ctx,
 
    free(linking_shaders);
 
+#ifdef DEBUG
+   /* At this point linked should contain all of the linked IR, so
+    * validate it to make sure nothing went wrong.
+    */
+   if (linked)
+      validate_ir_tree(linked->ir);
+#endif
+
    /* Make a pass over all variable declarations to ensure that arrays with
     * unspecified sizes have a size specified.  The size is inferred from the
     * max_array_access field.

From 36291173c20b7b90da8e765871efb37205786922 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@gmail.com>
Date: Tue, 9 Aug 2011 10:39:52 +0100
Subject: [PATCH 264/600] docs: update GL3.txt with new GL 4.2 extensions

---
 docs/GL3.txt | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 135bc4bab67..c0cc4d172e0 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -114,6 +114,19 @@ GL_ARB_vertex_attrib_64bit                           not started
 GL_ARB_viewport_array                                not started
 
 
+GL 4.2:
+GLSL 4.2                                             not started
+GL_ARB_texture_compression_bptc                      not started
+GL_ARB_compressed_texture_pixel_storage              not started
+GL_ARB_shader_atomic_counters                        not started
+GL_ARB_texture_storage                               not started
+GL_ARB_transform_feedback_instanced                  not started
+GL_ARB_base_instance                                 not started
+GL_ARB_shader_image_load_store                       not started
+GL_ARB_conservative_depth                            not started (may be close to AMD_conservative_depth though)
+GL_ARB_shading_language_420pack                      not started
+GL_ARB_internalformat_query                          not started
+GL_ARB_map_buffer_alignment                          not started
 
 
 More info about these features and the work involved can be found at

From afd1d857752b5c30a3082068f8bb9002e0c69699 Mon Sep 17 00:00:00 2001
From: Fabio Pedretti <fabio.ped@libero.it>
Date: Tue, 9 Aug 2011 08:08:59 -0600
Subject: [PATCH 265/600] swrast: silence unused var warnings

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/swrast/s_span.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c
index db102ac7946..9a91be39970 100644
--- a/src/mesa/swrast/s_span.c
+++ b/src/mesa/swrast/s_span.c
@@ -212,10 +212,10 @@ interpolate_active_attribs(struct gl_context *ctx, SWspan *span, GLbitfield attr
 static INLINE void
 interpolate_int_colors(struct gl_context *ctx, SWspan *span)
 {
+#if CHAN_BITS != 32
    const GLuint n = span->end;
    GLuint i;
 
-#if CHAN_BITS != 32
    ASSERT(!(span->arrayMask & SPAN_RGBA));
 #endif
 

From e0496b63ff0d41a36812b78e9062e92590fcdd55 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 9 Aug 2011 08:58:20 -0600
Subject: [PATCH 266/600] glx: move declarations before code

---
 src/glx/dri2_glx.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index 80e4da30beb..d9524d765bd 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -455,16 +455,20 @@ dri2_wait_gl(struct glx_context *gc)
 static void
 dri2FlushFrontBuffer(__DRIdrawable *driDrawable, void *loaderPrivate)
 {
+   struct glx_display *priv;
+   struct dri2_display *pdp;
+   struct glx_context *gc;
    struct dri2_drawable *pdraw = loaderPrivate;
+
    if (!pdraw)
       return;
 
    if (!pdraw->base.psc)
       return;
 
-   struct glx_display *priv = __glXInitialize(pdraw->base.psc->dpy);
-   struct dri2_display *pdp = (struct dri2_display *)priv->dri2Display;
-   struct glx_context *gc = __glXGetCurrentContext();
+   priv = __glXInitialize(pdraw->base.psc->dpy);
+   pdp = (struct dri2_display *) priv->dri2Display;
+   gc = __glXGetCurrentContext();
 
    /* Old servers don't send invalidate events */
    if (!pdp->invalidateAvailable)

From 971905bf394e4a6342c206d170abd6661ea38e21 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 9 Aug 2011 08:58:47 -0600
Subject: [PATCH 267/600] svga: add missing switch case for
 PIPE_SHADER_CAP_INTEGERS

---
 src/gallium/drivers/svga/svga_screen.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 4be10ef5821..e0c11590df0 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -245,6 +245,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
          return 0;
       case PIPE_SHADER_CAP_SUBROUTINES:
          return 0;
+      case PIPE_SHADER_CAP_INTEGERS:
+         return 0;
       }
       break;
    case PIPE_SHADER_VERTEX:

From 32faaea743ca74f4ba29184ef44ebf2c0e962a46 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 9 Aug 2011 09:00:29 -0600
Subject: [PATCH 268/600] r300g: silence some warnings about uninitialized
 variables

---
 src/gallium/drivers/r300/compiler/radeon_program_alu.c | 2 +-
 src/gallium/drivers/r300/compiler/radeon_program_tex.c | 4 ++--
 src/gallium/drivers/r300/r300_blit.c                   | 8 +++++---
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
index 9fc991166a3..e273bc40c26 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
@@ -87,7 +87,7 @@ static struct rc_instruction *emit3(
 
 static struct rc_dst_register dstregtmpmask(int index, int mask)
 {
-	struct rc_dst_register dst = {0};
+	struct rc_dst_register dst = {0, 0, 0};
 	dst.File = RC_FILE_TEMPORARY;
 	dst.Index = index;
 	dst.WriteMask = mask;
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_tex.c b/src/gallium/drivers/r300/compiler/radeon_program_tex.c
index 8d16b2cf9ec..9d69ebd18fb 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_tex.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_tex.c
@@ -35,7 +35,7 @@
 static struct rc_src_register shadow_fail_value(struct r300_fragment_program_compiler *compiler,
 						int tmu)
 {
-	struct rc_src_register reg = { 0, };
+	struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 };
 
 	if (compiler->enable_shadow_ambient) {
 		reg.File = RC_FILE_CONSTANT;
@@ -55,7 +55,7 @@ static struct rc_src_register shadow_fail_value(struct r300_fragment_program_com
 static struct rc_src_register shadow_pass_value(struct r300_fragment_program_compiler *compiler,
 						int tmu)
 {
-	struct rc_src_register reg = { 0, };
+	struct rc_src_register reg = { 0, 0, 0, 0, 0, 0 };
 
 	reg.File = RC_FILE_NONE;
 	reg.Swizzle = combine_swizzles(RC_SWIZZLE_1111,
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index db97e496e19..e7a926829d1 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -409,10 +409,11 @@ void r300_decompress_zmask(struct r300_context *r300)
 
 void r300_decompress_zmask_locked_unsafe(struct r300_context *r300)
 {
-    struct pipe_framebuffer_state fb = {0};
+    struct pipe_framebuffer_state fb;
+
+    memset(&fb, 0, sizeof(fb));
     fb.width = r300->locked_zbuffer->width;
     fb.height = r300->locked_zbuffer->height;
-    fb.nr_cbufs = 0;
     fb.zsbuf = r300->locked_zbuffer;
 
     r300->context.set_framebuffer_state(&r300->context, &fb);
@@ -421,8 +422,9 @@ void r300_decompress_zmask_locked_unsafe(struct r300_context *r300)
 
 void r300_decompress_zmask_locked(struct r300_context *r300)
 {
-    struct pipe_framebuffer_state saved_fb = {0};
+    struct pipe_framebuffer_state saved_fb;
 
+    memset(&saved_fb, 0, sizeof(saved_fb));
     util_copy_framebuffer_state(&saved_fb, r300->fb_state.state);
     r300_decompress_zmask_locked_unsafe(r300);
     r300->context.set_framebuffer_state(&r300->context, &saved_fb);

From e6c64800cc8833fb4083a556c839b51e8ac84a8b Mon Sep 17 00:00:00 2001
From: Henri Verbeet <hverbeet@gmail.com>
Date: Tue, 9 Aug 2011 12:23:47 -0500
Subject: [PATCH 269/600] glsl_to_tgsi: improve assignment hack

Fixes StarCraft 2 and Fallout 3 in Wine.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index d7a1ba80e1d..aef23e7d207 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1994,15 +1994,17 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
    } else if (ir->rhs->as_expression() &&
               this->instructions.get_tail() &&
               ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
-              type_size(ir->lhs->type) == 1) {
+              type_size(ir->lhs->type) == 1 &&
+              l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
       /* To avoid emitting an extra MOV when assigning an expression to a 
        * variable, emit the last instruction of the expression again, but
        * replace the destination register with the target of the assignment.
        * Dead code elimination will remove the original instruction.
        */
-      glsl_to_tgsi_instruction *inst;
+      glsl_to_tgsi_instruction *inst, *new_inst;
       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
-      emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+      new_inst->saturate = inst->saturate;
    } else {
       for (i = 0; i < type_size(ir->lhs->type); i++) {
          emit(ir, TGSI_OPCODE_MOV, l, r);

From fa43477fa33c068915283d511b64e3d6470ccd73 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 4 May 2011 13:27:33 -0700
Subject: [PATCH 270/600] mesa: Add a convenience interface for register
 allocator conflicts setup.

---
 src/mesa/program/register_allocate.c | 21 +++++++++++++++++++++
 src/mesa/program/register_allocate.h |  2 ++
 2 files changed, 23 insertions(+)

diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c
index de96eb42c9b..f5b5174fc18 100644
--- a/src/mesa/program/register_allocate.c
+++ b/src/mesa/program/register_allocate.c
@@ -200,6 +200,27 @@ ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
    }
 }
 
+/**
+ * Adds a conflict between base_reg and reg, and also between reg and
+ * anything that base_reg conflicts with.
+ *
+ * This can simplify code for setting up multiple register classes
+ * which are aggregates of some base hardware registers, compared to
+ * explicitly using ra_add_reg_conflict.
+ */
+void
+ra_add_transitive_reg_conflict(struct ra_regs *regs,
+			       unsigned int base_reg, unsigned int reg)
+{
+   int i;
+
+   ra_add_reg_conflict(regs, reg, base_reg);
+
+   for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) {
+      ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]);
+   }
+}
+
 unsigned int
 ra_alloc_reg_class(struct ra_regs *regs)
 {
diff --git a/src/mesa/program/register_allocate.h b/src/mesa/program/register_allocate.h
index 5b95833f394..ee2e58a4756 100644
--- a/src/mesa/program/register_allocate.h
+++ b/src/mesa/program/register_allocate.h
@@ -40,6 +40,8 @@ struct ra_regs *ra_alloc_reg_set(unsigned int count);
 unsigned int ra_alloc_reg_class(struct ra_regs *regs);
 void ra_add_reg_conflict(struct ra_regs *regs,
 			 unsigned int r1, unsigned int r2);
+void ra_add_transitive_reg_conflict(struct ra_regs *regs,
+				    unsigned int base_reg, unsigned int reg);
 void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg);
 void ra_set_finalize(struct ra_regs *regs);
 /** @} */

From bbcf13adbe525bd389a65ba15dd7831a56b8b13c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 4 May 2011 13:31:01 -0700
Subject: [PATCH 271/600] i965/fs: Use the new convenience interface for
 setting up reg conflicts.

That code I wrote was impenetrable, and hard to write the first time.
This makes things a lot more obvious.
---
 .../drivers/dri/i965/brw_fs_reg_allocate.cpp  | 27 +++++--------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 78daa491156..f246ac49660 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -157,29 +157,14 @@ fs_visitor::assign_regs()
       classes[i] = ra_alloc_reg_class(regs);
 
       for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
-	 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
-      }
+	 int class_reg = class_base_reg[i] + i_r;
 
-      /* Add conflicts between our contiguous registers aliasing
-       * base regs and other register classes' contiguous registers
-       * that alias base regs, or the base regs themselves for classes[0].
-       */
-      for (int c = 0; c <= i; c++) {
-	 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
-	    for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
-		 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]);
-		 c_r++) {
+	 ra_class_add_reg(regs, classes[i], class_reg);
 
-	       if (0) {
-		  printf("%d/%d conflicts %d/%d\n",
-			 class_sizes[i], first_assigned_grf + i_r,
-			 class_sizes[c], first_assigned_grf + c_r);
-	       }
-
-	       ra_add_reg_conflict(regs,
-				   class_base_reg[i] + i_r,
-				   class_base_reg[c] + c_r);
-	    }
+	 for (int base_reg = i_r;
+	      base_reg < i_r + class_sizes[i];
+	      base_reg++) {
+	    ra_add_transitive_reg_conflict(regs, base_reg, class_reg);
 	 }
       }
    }

From b76378d46a211521582cfab56dc05031a57502a6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 4 May 2011 13:50:13 -0700
Subject: [PATCH 272/600] i965/fs: Eliminate the magic nature of virtual GRF 0.

This was a debugging aid at one point -- virtual grf 0 should never be
allocated, and it would be used if undefined register access occurred
in codegen.  However, it made the confusing register allocation code
even more confusing by indexing things off of 1 all over.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp          |  9 ++---
 src/mesa/drivers/dri/i965/brw_fs.h            |  2 +-
 .../drivers/dri/i965/brw_fs_reg_allocate.cpp  | 33 +++++++------------
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  |  4 +--
 4 files changed, 17 insertions(+), 31 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index f55be022f72..d57a67cc4fc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -181,9 +181,6 @@ fs_visitor::virtual_grf_alloc(int size)
 	 virtual_grf_array_size *= 2;
       virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
 				   virtual_grf_array_size);
-
-      /* This slot is always unused. */
-      virtual_grf_sizes[0] = 0;
    }
    virtual_grf_sizes[virtual_grf_next] = size;
    return virtual_grf_next++;
@@ -985,7 +982,7 @@ fs_visitor::calculate_live_intervals()
 	 }
       } else {
 	 for (unsigned int i = 0; i < 3; i++) {
-	    if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
+	    if (inst->src[i].file == GRF) {
 	       int reg = inst->src[i].reg;
 
 	       if (!loop_depth) {
@@ -1001,7 +998,7 @@ fs_visitor::calculate_live_intervals()
 	       }
 	    }
 	 }
-	 if (inst->dst.file == GRF && inst->dst.reg != 0) {
+	 if (inst->dst.file == GRF) {
 	    int reg = inst->dst.reg;
 
 	    if (!loop_depth) {
@@ -1715,7 +1712,7 @@ fs_visitor::run()
       if (0) {
 	 /* Debug of register spilling: Go spill everything. */
 	 int virtual_grf_count = virtual_grf_next;
-	 for (int i = 1; i < virtual_grf_count; i++) {
+	 for (int i = 0; i < virtual_grf_count; i++) {
 	    spill_reg(i);
 	 }
       }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 96e1420038f..0375f672bec 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -402,7 +402,7 @@ public:
       this->base_ir = NULL;
 
       this->virtual_grf_sizes = NULL;
-      this->virtual_grf_next = 1;
+      this->virtual_grf_next = 0;
       this->virtual_grf_array_size = 0;
       this->virtual_grf_def = NULL;
       this->virtual_grf_use = NULL;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index f246ac49660..83dd629aafb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -50,7 +50,7 @@ extern "C" {
 static void
 assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
 {
-   if (reg->file == GRF && reg->reg != 0) {
+   if (reg->file == GRF) {
       assert(reg->reg_offset >= 0);
       reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
       reg->reg = 0;
@@ -60,20 +60,17 @@ assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
 void
 fs_visitor::assign_regs_trivial()
 {
-   int last_grf = 0;
-   int hw_reg_mapping[this->virtual_grf_next];
+   int hw_reg_mapping[this->virtual_grf_next + 1];
    int i;
    int reg_width = c->dispatch_width / 8;
 
-   hw_reg_mapping[0] = 0;
    /* Note that compressed instructions require alignment to 2 registers. */
-   hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width);
-   for (i = 2; i < this->virtual_grf_next; i++) {
+   hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
+   for (i = 1; i <= this->virtual_grf_next; i++) {
       hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
 			   this->virtual_grf_sizes[i - 1] * reg_width);
    }
-   last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] *
-				       reg_width);
+   this->grf_used = hw_reg_mapping[this->virtual_grf_next];
 
    foreach_list(node, &this->instructions) {
       fs_inst *inst = (fs_inst *)node;
@@ -83,12 +80,11 @@ fs_visitor::assign_regs_trivial()
       assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
    }
 
-   if (last_grf >= BRW_MAX_GRF) {
+   if (this->grf_used >= BRW_MAX_GRF) {
       fail("Ran out of regs on trivial allocator (%d/%d)\n",
-	   last_grf, BRW_MAX_GRF);
+	   this->grf_used, BRW_MAX_GRF);
    }
 
-   this->grf_used = last_grf + reg_width;
 }
 
 bool
@@ -101,7 +97,7 @@ fs_visitor::assign_regs()
     * for reg_width == 2.
     */
    int reg_width = c->dispatch_width / 8;
-   int hw_reg_mapping[this->virtual_grf_next + 1];
+   int hw_reg_mapping[this->virtual_grf_next];
    int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
    int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
    int class_sizes[base_reg_count];
@@ -125,7 +121,7 @@ fs_visitor::assign_regs()
        */
       class_sizes[class_count++] = 2;
    }
-   for (int r = 1; r < this->virtual_grf_next; r++) {
+   for (int r = 0; r < this->virtual_grf_next; r++) {
       int i;
 
       for (i = 0; i < class_count; i++) {
@@ -195,12 +191,8 @@ fs_visitor::assign_regs()
 
    struct ra_graph *g = ra_alloc_interference_graph(regs,
 						    this->virtual_grf_next);
-   /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
-    * with nodes.
-    */
-   ra_set_node_class(g, 0, classes[0]);
 
-   for (int i = 1; i < this->virtual_grf_next; i++) {
+   for (int i = 0; i < this->virtual_grf_next; i++) {
       for (int c = 0; c < class_count; c++) {
 	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
 	    if (aligned_pair_class >= 0 &&
@@ -213,7 +205,7 @@ fs_visitor::assign_regs()
 	 }
       }
 
-      for (int j = 1; j < i; j++) {
+      for (int j = 0; j < i; j++) {
 	 if (virtual_grf_interferes(i, j)) {
 	    ra_add_node_interference(g, i, j);
 	 }
@@ -248,8 +240,7 @@ fs_visitor::assign_regs()
     * numbers.
     */
    this->grf_used = first_assigned_grf;
-   hw_reg_mapping[0] = 0; /* unused */
-   for (int i = 1; i < this->virtual_grf_next; i++) {
+   for (int i = 0; i < this->virtual_grf_next; i++) {
       int reg = ra_get_node_reg(g, i);
       int hw_reg = -1;
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2b769ccbba1..2e3f9be75b4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -142,9 +142,7 @@ fs_visitor::visit(ir_dereference_array *ir)
    this->result.type = brw_type_for_base_type(ir->type);
 
    if (index) {
-      assert(this->result.file == UNIFORM ||
-	     (this->result.file == GRF &&
-	      this->result.reg != 0));
+      assert(this->result.file == UNIFORM || this->result.file == GRF);
       this->result.reg_offset += index->value.i[0] * element_size;
    } else {
       assert(!"FINISHME: non-constant array element");

From 4e10d5825b31d2c58c0af3e29b7fc2eacb2b4709 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 5 May 2011 19:37:10 -0700
Subject: [PATCH 273/600] i965/fs: Simplify the register allocator using a map
 from RA reg to GRF.

It's fewer pointers to track, and when we start caching the register
set, should be algorithmically better in the cache hit case (lookup in
a byte-per-register array, instead of a linear walk through
desctiption of register classes to find how to translate that class).
---
 .../drivers/dri/i965/brw_fs_reg_allocate.cpp  | 77 +++++++++----------
 1 file changed, 37 insertions(+), 40 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 83dd629aafb..42ab66df6d8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -102,7 +102,7 @@ fs_visitor::assign_regs()
    int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
    int class_sizes[base_reg_count];
    int class_count = 0;
-   int aligned_pair_class = -1;
+   int aligned_pairs_class = -1;
 
    calculate_live_intervals();
 
@@ -137,52 +137,59 @@ fs_visitor::assign_regs()
       }
    }
 
+   /* Compute the total number of registers across all classes. */
    int ra_reg_count = 0;
-   int class_base_reg[class_count];
-   int class_reg_count[class_count];
-   int classes[class_count + 1];
-
    for (int i = 0; i < class_count; i++) {
-      class_base_reg[i] = ra_reg_count;
-      class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
-      ra_reg_count += class_reg_count[i];
+      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
    }
 
    struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
+   uint8_t ra_reg_to_grf[ra_reg_count];
+   int classes[class_count + 1];
+
+   /* Now, add the registers to their classes, and add the conflicts
+    * between them and the base GRF registers (and also each other).
+    */
+   int reg = 0;
+   int pairs_base_reg = 0;
+   int pairs_reg_count = 0;
    for (int i = 0; i < class_count; i++) {
+      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
       classes[i] = ra_alloc_reg_class(regs);
 
-      for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
-	 int class_reg = class_base_reg[i] + i_r;
+      /* Save this off for the aligned pair class at the end. */
+      if (class_sizes[i] == 2) {
+	 pairs_base_reg = reg;
+	 pairs_reg_count = class_reg_count;
+      }
 
-	 ra_class_add_reg(regs, classes[i], class_reg);
+      for (int j = 0; j < class_reg_count; j++) {
+	 ra_class_add_reg(regs, classes[i], reg);
 
-	 for (int base_reg = i_r;
-	      base_reg < i_r + class_sizes[i];
+	 ra_reg_to_grf[reg] = j;
+
+	 for (int base_reg = j;
+	      base_reg < j + class_sizes[i];
 	      base_reg++) {
-	    ra_add_transitive_reg_conflict(regs, base_reg, class_reg);
+	    ra_add_transitive_reg_conflict(regs, base_reg, reg);
 	 }
+
+	 reg++;
       }
    }
+   assert(reg == ra_reg_count);
 
    /* Add a special class for aligned pairs, which we'll put delta_x/y
     * in on gen5 so that we can do PLN.
     */
    if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
-      int reg_count = (base_reg_count - 1) / 2;
-      int unaligned_pair_class = 1;
-      assert(class_sizes[unaligned_pair_class] == 2);
+      aligned_pairs_class = ra_alloc_reg_class(regs);
 
-      aligned_pair_class = class_count;
-      classes[aligned_pair_class] = ra_alloc_reg_class(regs);
-      class_sizes[aligned_pair_class] = 2;
-      class_base_reg[aligned_pair_class] = 0;
-      class_reg_count[aligned_pair_class] = 0;
-      int start = (first_assigned_grf & 1) ? 1 : 0;
-
-      for (int i = 0; i < reg_count; i++) {
-	 ra_class_add_reg(regs, classes[aligned_pair_class],
-			  class_base_reg[unaligned_pair_class] + i * 2 + start);
+      for (int i = 0; i < pairs_reg_count; i++) {
+	 if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
+	    ra_class_add_reg(regs, aligned_pairs_class,
+			     pairs_base_reg + i);
+	 }
       }
       class_count++;
    }
@@ -195,9 +202,9 @@ fs_visitor::assign_regs()
    for (int i = 0; i < this->virtual_grf_next; i++) {
       for (int c = 0; c < class_count; c++) {
 	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
-	    if (aligned_pair_class >= 0 &&
+	    if (aligned_pairs_class >= 0 &&
 		this->delta_x.reg == i) {
-	       ra_set_node_class(g, i, classes[aligned_pair_class]);
+	       ra_set_node_class(g, i, aligned_pairs_class);
 	    } else {
 	       ra_set_node_class(g, i, classes[c]);
 	    }
@@ -242,18 +249,8 @@ fs_visitor::assign_regs()
    this->grf_used = first_assigned_grf;
    for (int i = 0; i < this->virtual_grf_next; i++) {
       int reg = ra_get_node_reg(g, i);
-      int hw_reg = -1;
 
-      for (int c = 0; c < class_count; c++) {
-	 if (reg >= class_base_reg[c] &&
-	     reg < class_base_reg[c] + class_reg_count[c]) {
-	    hw_reg = reg - class_base_reg[c];
-	    break;
-	 }
-      }
-
-      assert(hw_reg >= 0);
-      hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width;
+      hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width;
       this->grf_used = MAX2(this->grf_used,
 			    hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
 			    reg_width);

From b1f0bffd399f377a19b0541e1d834afad8b9dad0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 9 May 2011 09:56:18 -0700
Subject: [PATCH 274/600] i965/fs: Factor out the register allocator setup to a
 separate function.

Besides separating out a logical step of the giant register allocator
function, this now communicates a bunch of the allocator information
through entries in brw_context, which will make this code partially
reusable for caching the expensive allocator setup.
---
 src/mesa/drivers/dri/i965/brw_context.h       |  23 +++
 .../drivers/dri/i965/brw_fs_reg_allocate.cpp  | 148 ++++++++++--------
 2 files changed, 105 insertions(+), 66 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 22baf978ad4..cc11d06874d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -748,6 +748,29 @@ struct brw_context
        * Pre-gen6, push constants live in the CURBE.
        */
       uint32_t push_const_offset;
+
+      /** @{ register allocator */
+
+      struct ra_regs *regs;
+
+      /** Array of the ra classes for the unaligned contiguous
+       * register block sizes used.
+       */
+      int *classes;
+
+      /**
+       * Mapping for register-allocated objects in *regs to the first
+       * GRF for that object.
+      */
+      uint8_t *ra_reg_to_grf;
+
+      /**
+       * ra class for the aligned pairs we use for PLN, which doesn't
+       * appear in *classes.
+       */
+      int aligned_pairs_class;
+
+      /** @} */
    } wm;
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 42ab66df6d8..8e44a010576 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -87,6 +87,80 @@ fs_visitor::assign_regs_trivial()
 
 }
 
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+			      int *class_sizes,
+			      int class_count,
+			      int reg_width,
+			      int base_reg_count)
+{
+   struct intel_context *intel = &brw->intel;
+
+   /* Compute the total number of registers across all classes. */
+   int ra_reg_count = 0;
+   for (int i = 0; i < class_count; i++) {
+      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+   }
+
+   ralloc_free(brw->wm.ra_reg_to_grf);
+   brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+   ralloc_free(brw->wm.regs);
+   brw->wm.regs = ra_alloc_reg_set(ra_reg_count);
+   ralloc_free(brw->wm.classes);
+   brw->wm.classes = ralloc_array(brw, int, class_count + 1);
+
+   brw->wm.aligned_pairs_class = -1;
+
+   /* Now, add the registers to their classes, and add the conflicts
+    * between them and the base GRF registers (and also each other).
+    */
+   int reg = 0;
+   int pairs_base_reg = 0;
+   int pairs_reg_count = 0;
+   for (int i = 0; i < class_count; i++) {
+      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+      brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs);
+
+      /* Save this off for the aligned pair class at the end. */
+      if (class_sizes[i] == 2) {
+	 pairs_base_reg = reg;
+	 pairs_reg_count = class_reg_count;
+      }
+
+      for (int j = 0; j < class_reg_count; j++) {
+	 ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg);
+
+	 brw->wm.ra_reg_to_grf[reg] = j;
+
+	 for (int base_reg = j;
+	      base_reg < j + class_sizes[i];
+	      base_reg++) {
+	    ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg);
+	 }
+
+	 reg++;
+      }
+   }
+   assert(reg == ra_reg_count);
+
+   /* Add a special class for aligned pairs, which we'll put delta_x/y
+    * in on gen5 so that we can do PLN.
+    */
+   if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
+      brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs);
+
+      for (int i = 0; i < pairs_reg_count; i++) {
+	 if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
+	    ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class,
+			     pairs_base_reg + i);
+	 }
+      }
+      class_count++;
+   }
+
+   ra_set_finalize(brw->wm.regs);
+}
+
 bool
 fs_visitor::assign_regs()
 {
@@ -102,7 +176,6 @@ fs_visitor::assign_regs()
    int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
    int class_sizes[base_reg_count];
    int class_count = 0;
-   int aligned_pairs_class = -1;
 
    calculate_live_intervals();
 
@@ -137,76 +210,20 @@ fs_visitor::assign_regs()
       }
    }
 
-   /* Compute the total number of registers across all classes. */
-   int ra_reg_count = 0;
-   for (int i = 0; i < class_count; i++) {
-      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
-   }
+   brw_alloc_reg_set_for_classes(brw, class_sizes, class_count,
+				 reg_width, base_reg_count);
 
-   struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
-   uint8_t ra_reg_to_grf[ra_reg_count];
-   int classes[class_count + 1];
-
-   /* Now, add the registers to their classes, and add the conflicts
-    * between them and the base GRF registers (and also each other).
-    */
-   int reg = 0;
-   int pairs_base_reg = 0;
-   int pairs_reg_count = 0;
-   for (int i = 0; i < class_count; i++) {
-      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
-      classes[i] = ra_alloc_reg_class(regs);
-
-      /* Save this off for the aligned pair class at the end. */
-      if (class_sizes[i] == 2) {
-	 pairs_base_reg = reg;
-	 pairs_reg_count = class_reg_count;
-      }
-
-      for (int j = 0; j < class_reg_count; j++) {
-	 ra_class_add_reg(regs, classes[i], reg);
-
-	 ra_reg_to_grf[reg] = j;
-
-	 for (int base_reg = j;
-	      base_reg < j + class_sizes[i];
-	      base_reg++) {
-	    ra_add_transitive_reg_conflict(regs, base_reg, reg);
-	 }
-
-	 reg++;
-      }
-   }
-   assert(reg == ra_reg_count);
-
-   /* Add a special class for aligned pairs, which we'll put delta_x/y
-    * in on gen5 so that we can do PLN.
-    */
-   if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
-      aligned_pairs_class = ra_alloc_reg_class(regs);
-
-      for (int i = 0; i < pairs_reg_count; i++) {
-	 if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
-	    ra_class_add_reg(regs, aligned_pairs_class,
-			     pairs_base_reg + i);
-	 }
-      }
-      class_count++;
-   }
-
-   ra_set_finalize(regs);
-
-   struct ra_graph *g = ra_alloc_interference_graph(regs,
+   struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs,
 						    this->virtual_grf_next);
 
    for (int i = 0; i < this->virtual_grf_next; i++) {
       for (int c = 0; c < class_count; c++) {
 	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
-	    if (aligned_pairs_class >= 0 &&
+	    if (brw->wm.aligned_pairs_class >= 0 &&
 		this->delta_x.reg == i) {
-	       ra_set_node_class(g, i, aligned_pairs_class);
+	       ra_set_node_class(g, i, brw->wm.aligned_pairs_class);
 	    } else {
-	       ra_set_node_class(g, i, classes[c]);
+	       ra_set_node_class(g, i, brw->wm.classes[c]);
 	    }
 	    break;
 	 }
@@ -237,7 +254,6 @@ fs_visitor::assign_regs()
 
 
       ralloc_free(g);
-      ralloc_free(regs);
 
       return false;
    }
@@ -250,7 +266,8 @@ fs_visitor::assign_regs()
    for (int i = 0; i < this->virtual_grf_next; i++) {
       int reg = ra_get_node_reg(g, i);
 
-      hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width;
+      hw_reg_mapping[i] = (first_assigned_grf +
+			   brw->wm.ra_reg_to_grf[reg] * reg_width);
       this->grf_used = MAX2(this->grf_used,
 			    hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
 			    reg_width);
@@ -265,7 +282,6 @@ fs_visitor::assign_regs()
    }
 
    ralloc_free(g);
-   ralloc_free(regs);
 
    return true;
 }

From c9e81fe14f36933617c862efb15ae09194485eab Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 15 May 2011 09:36:19 -0700
Subject: [PATCH 275/600] i965: Drop the reg/hw_reg distinction.

"reg" was set in only one case, virtual GRFs pre register allocation,
and would be unset and have hw_reg set after allocation.  Since we
never bothered with looking at virtual GRF number after allocation
anyway, just use the same storage and avoid confusion.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 34 +++++++++----------
 src/mesa/drivers/dri/i965/brw_fs.h            | 21 +++++++-----
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp     |  6 ++--
 .../drivers/dri/i965/brw_fs_reg_allocate.cpp  |  4 +--
 .../dri/i965/brw_fs_schedule_instructions.cpp |  8 ++---
 5 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d57a67cc4fc..cafb7092ac8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -187,20 +187,20 @@ fs_visitor::virtual_grf_alloc(int size)
 }
 
 /** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg)
+fs_reg::fs_reg(enum register_file file, int reg)
 {
    init();
    this->file = file;
-   this->hw_reg = hw_reg;
+   this->reg = reg;
    this->type = BRW_REGISTER_TYPE_F;
 }
 
 /** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type)
+fs_reg::fs_reg(enum register_file file, int reg, uint32_t type)
 {
    init();
    this->file = file;
-   this->hw_reg = hw_reg;
+   this->reg = reg;
    this->type = type;
 }
 
@@ -636,7 +636,7 @@ fs_visitor::assign_curb_setup()
 
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == UNIFORM) {
-	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+	    int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
 	    struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs +
 						  constant_nr / 8,
 						  constant_nr % 8);
@@ -810,7 +810,7 @@ fs_visitor::remove_dead_constants()
 	 fs_inst *inst = (fs_inst *)node;
 
 	 for (int i = 0; i < 3; i++) {
-	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+	    int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
 
 	    if (inst->src[i].file != UNIFORM)
 	       continue;
@@ -862,13 +862,13 @@ fs_visitor::remove_dead_constants()
       fs_inst *inst = (fs_inst *)node;
 
       for (int i = 0; i < 3; i++) {
-	 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+	 int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
 
 	 if (inst->src[i].file != UNIFORM)
 	    continue;
 
 	 assert(this->params_remap[constant_nr] != -1);
-	 inst->src[i].hw_reg = this->params_remap[constant_nr];
+	 inst->src[i].reg = this->params_remap[constant_nr];
 	 inst->src[i].reg_offset = 0;
       }
    }
@@ -912,7 +912,7 @@ fs_visitor::setup_pull_constants()
 	 if (inst->src[i].file != UNIFORM)
 	    continue;
 
-	 int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+	 int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
 	 if (uniform_nr < pull_uniform_base)
 	    continue;
 
@@ -1374,9 +1374,9 @@ fs_visitor::compute_to_mrf()
       /* Work out which hardware MRF registers are written by this
        * instruction.
        */
-      int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+      int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4;
       int mrf_high;
-      if (inst->dst.hw_reg & BRW_MRF_COMPR4) {
+      if (inst->dst.reg & BRW_MRF_COMPR4) {
 	 mrf_high = mrf_low + 4;
       } else if (c->dispatch_width == 16 &&
 		 (!inst->force_uncompressed && !inst->force_sechalf)) {
@@ -1443,7 +1443,7 @@ fs_visitor::compute_to_mrf()
 	    if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
 	       /* Found the creator of our MRF's source value. */
 	       scan_inst->dst.file = MRF;
-	       scan_inst->dst.hw_reg = inst->dst.hw_reg;
+	       scan_inst->dst.reg = inst->dst.reg;
 	       scan_inst->saturate |= inst->saturate;
 	       inst->remove();
 	       progress = true;
@@ -1480,10 +1480,10 @@ fs_visitor::compute_to_mrf()
 	    /* If somebody else writes our MRF here, we can't
 	     * compute-to-MRF before that.
 	     */
-	    int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+	    int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4;
 	    int scan_mrf_high;
 
-	    if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) {
+	    if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
 	       scan_mrf_high = scan_mrf_low + 4;
 	    } else if (c->dispatch_width == 16 &&
 		       (!scan_inst->force_uncompressed &&
@@ -1555,7 +1555,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 
       if (inst->opcode == BRW_OPCODE_MOV &&
 	  inst->dst.file == MRF) {
-	 fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg];
+	 fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
 	 if (prev_inst && inst->equals(prev_inst)) {
 	    inst->remove();
 	    progress = true;
@@ -1565,7 +1565,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 
       /* Clear out the last-write records for MRFs that were overwritten. */
       if (inst->dst.file == MRF) {
-	 last_mrf_move[inst->dst.hw_reg] = NULL;
+	 last_mrf_move[inst->dst.reg] = NULL;
       }
 
       if (inst->mlen > 0) {
@@ -1591,7 +1591,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 	  inst->dst.file == MRF &&
 	  inst->src[0].file == GRF &&
 	  !inst->predicated) {
-	 last_mrf_move[inst->dst.hw_reg] = inst;
+	 last_mrf_move[inst->dst.reg] = inst;
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 0375f672bec..4ec649014de 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -51,7 +51,7 @@ enum register_file {
    MRF = BRW_MESSAGE_REGISTER_FILE,
    IMM = BRW_IMMEDIATE_VALUE,
    FIXED_HW_REG, /* a struct brw_reg */
-   UNIFORM, /* prog_data->params[hw_reg] */
+   UNIFORM, /* prog_data->params[reg] */
    BAD_FILE
 };
 
@@ -99,7 +99,6 @@ public:
    void init()
    {
       memset(this, 0, sizeof(*this));
-      this->hw_reg = -1;
       this->smear = -1;
    }
 
@@ -146,8 +145,8 @@ public:
       this->type = fixed_hw_reg.type;
    }
 
-   fs_reg(enum register_file file, int hw_reg);
-   fs_reg(enum register_file file, int hw_reg, uint32_t type);
+   fs_reg(enum register_file file, int reg);
+   fs_reg(enum register_file file, int reg, uint32_t type);
    fs_reg(class fs_visitor *v, const struct glsl_type *type);
 
    bool equals(fs_reg *r)
@@ -155,7 +154,6 @@ public:
       return (file == r->file &&
 	      reg == r->reg &&
 	      reg_offset == r->reg_offset &&
-	      hw_reg == r->hw_reg &&
 	      type == r->type &&
 	      negate == r->negate &&
 	      abs == r->abs &&
@@ -167,12 +165,17 @@ public:
 
    /** Register file: ARF, GRF, MRF, IMM. */
    enum register_file file;
-   /** virtual register number.  0 = fixed hw reg */
+   /**
+    * Register number.  For ARF/MRF, it's the hardware register.  For
+    * GRF, it's a virtual register number until register allocation
+    */
    int reg;
-   /** Offset within the virtual register. */
+   /**
+    * For virtual registers, this is a hardware register offset from
+    * the start of the register block (for example, a constant index
+    * in an array access).
+    */
    int reg_offset;
-   /** HW register number.  Generally unset until register allocation. */
-   int hw_reg;
    /** Register type.  BRW_REGISTER_TYPE_* */
    int type;
    bool negate;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 9fb0153d1f8..e168e541bef 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -538,11 +538,9 @@ brw_reg_from_fs_reg(fs_reg *reg)
    case ARF:
    case MRF:
       if (reg->smear == -1) {
-	 brw_reg = brw_vec8_reg(reg->file,
-				reg->hw_reg, 0);
+	 brw_reg = brw_vec8_reg(reg->file, reg->reg, 0);
       } else {
-	 brw_reg = brw_vec1_reg(reg->file,
-				reg->hw_reg, reg->smear);
+	 brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear);
       }
       brw_reg = retype(brw_reg, reg->type);
       if (reg->sechalf)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 8e44a010576..5c9cba99ae5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -52,8 +52,8 @@ assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
 {
    if (reg->file == GRF) {
       assert(reg->reg_offset >= 0);
-      reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
-      reg->reg = 0;
+      reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
+      reg->reg_offset = 0;
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index 9ec3f502764..f1a88fcfa79 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -321,12 +321,12 @@ instruction_scheduler::calculate_deps()
 	 add_dep(last_grf_write[inst->dst.reg], n);
 	 last_grf_write[inst->dst.reg] = n;
       } else if (inst->dst.file == MRF) {
-	 int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+	 int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
 
 	 add_dep(last_mrf_write[reg], n);
 	 last_mrf_write[reg] = n;
 	 if (is_compressed(inst)) {
-	    if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+	    if (inst->dst.reg & BRW_MRF_COMPR4)
 	       reg += 4;
 	    else
 	       reg++;
@@ -401,12 +401,12 @@ instruction_scheduler::calculate_deps()
       if (inst->dst.file == GRF) {
 	 last_grf_write[inst->dst.reg] = n;
       } else if (inst->dst.file == MRF) {
-	 int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+	 int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
 
 	 last_mrf_write[reg] = n;
 
 	 if (is_compressed(inst)) {
-	    if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+	    if (inst->dst.reg & BRW_MRF_COMPR4)
 	       reg += 4;
 	    else
 	       reg++;

From 09eeb0ff27005c0ffccd5cdbe46862e181a4ee6c Mon Sep 17 00:00:00 2001
From: Carl Simonson <simonsonc@gmail.com>
Date: Wed, 10 Aug 2011 11:10:43 -0700
Subject: [PATCH 276/600] i830: Add missing vtable entry for i830 from the hiz
 work.

---
 src/mesa/drivers/dri/i915/i830_vtbl.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c
index 6d43726beb1..ed5286fd7d9 100644
--- a/src/mesa/drivers/dri/i915/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i830_vtbl.c
@@ -881,6 +881,12 @@ i830_invalidate_state(struct intel_context *intel, GLuint new_state)
       i830_update_provoking_vertex(&intel->ctx);
 }
 
+static bool
+i830_is_hiz_depth_format(struct intel_context *intel, gl_format format)
+{
+   return false;
+}
+
 void
 i830InitVtbl(struct i830_context *i830)
 {
@@ -898,4 +904,5 @@ i830InitVtbl(struct i830_context *i830)
    i830->intel.vtbl.finish_batch = intel_finish_vb;
    i830->intel.vtbl.invalidate_state = i830_invalidate_state;
    i830->intel.vtbl.render_target_supported = i830_render_target_supported;
+   i830->intel.vtbl.is_hiz_depth_format = i830_is_hiz_depth_format;
 }

From df7859be6b6b6c227e7a4e0b7fbfafcd0800f4f8 Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Mon, 8 Aug 2011 12:32:13 +0300
Subject: [PATCH 277/600] r600g: Add support for ROUND, v2

This is a GLSL 1.3 feature, but also used by MLAA.

Signed-off-by: Lauri Kasanen <cand@gmx.com>
---
 src/gallium/drivers/r600/r600_asm.c    | 2 ++
 src/gallium/drivers/r600/r600_shader.c | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 5fae2b00c8b..24af9917a6f 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -88,6 +88,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r
 		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
 		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
 		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+		case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE:
 			return 1;
 		default: R600_ERR(
 			"Need instruction operand number for 0x%x.\n", alu->inst);
@@ -140,6 +141,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r
 		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT:
 		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
 		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+		case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE:
 			return 1;
 		default: R600_ERR(
 			"Need instruction operand number for 0x%x.\n", alu->inst);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index c55cdd707eb..2551aa26f2a 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -3243,7 +3243,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_FRC,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
 	{TGSI_OPCODE_CLAMP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_FLR,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
-	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_ROUND,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
 	{TGSI_OPCODE_EX2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
 	{TGSI_OPCODE_LG2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
 	{TGSI_OPCODE_POW,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
@@ -3401,7 +3401,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
 	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
-	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
 	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate},
 	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate},
 	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow},
@@ -3559,7 +3559,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_FRC,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2},
 	{TGSI_OPCODE_CLAMP,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_FLR,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2},
-	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_ROUND,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2},
 	{TGSI_OPCODE_EX2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr},
 	{TGSI_OPCODE_LG2,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr},
 	{TGSI_OPCODE_POW,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow},

From fa351bd2e0aecccd5ed6ef8744d5ba4a6dbf5d2c Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben@bwidawsk.net>
Date: Sun, 7 Aug 2011 17:04:04 -0700
Subject: [PATCH 278/600] intel: GetBuffer fix

After copy buffer on preGEN6, it is necessary to wait for the blit to
complete before returning data to the user.

This should fix the piglit test: copy_buffer_coherency (pre-GEN6).

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 439d6fc8247..703300b31af 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -282,12 +282,17 @@ intel_bufferobj_get_subdata(struct gl_context * ctx,
                             GLvoid * data, struct gl_buffer_object *obj)
 {
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   struct intel_context *intel = intel_context(ctx);
 
    assert(intel_obj);
    if (intel_obj->sys_buffer)
       memcpy(data, (char *)intel_obj->sys_buffer + offset, size);
-   else
+   else {
+      if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) {
+	 intel_batchbuffer_flush(intel);
+      }
       drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data);
+   }
 }
 
 

From e411cd7b0a54d2f9b9f4cda4918aa7742ed5c2a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20F=C3=A4nger?= <a.faenger@e-sign.com>
Date: Wed, 10 Aug 2011 08:07:29 +0000
Subject: [PATCH 279/600] swrast: initial multi-threaded span rendering

Optional parallel rendering of spans using OpenMP.
Initial implementation for aa triangles. A new option for scons is
also provided to activate the openmp support (off by default).

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 common.py                      |  1 +
 scons/gallium.py               | 12 ++++++
 src/mesa/swrast/s_aatritemp.h  | 72 ++++++++++++++++++++--------------
 src/mesa/swrast/s_context.c    | 26 +++++++++---
 src/mesa/swrast/s_texcombine.c |  4 ++
 src/mesa/tnl/t_pipeline.c      | 12 ++++++
 6 files changed, 91 insertions(+), 36 deletions(-)

diff --git a/common.py b/common.py
index 8657030ea3f..cfee1b5dc2e 100644
--- a/common.py
+++ b/common.py
@@ -88,6 +88,7 @@ def AddOptions(opts):
 	opts.Add('toolchain', 'compiler toolchain', default_toolchain)
 	opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no'))
 	opts.Add(BoolOption('llvm', 'use LLVM', default_llvm))
+	opts.Add(BoolOption('openmp', 'EXPERIMENTAL: compile with openmp (swrast)', 'no'))
 	opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes'))
 	opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no'))
 	opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes'))
diff --git a/scons/gallium.py b/scons/gallium.py
index 8cd3bc7f6e0..7135251d7a3 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -596,6 +596,18 @@ def generate(env):
         libs += ['m', 'pthread', 'dl']
     env.Append(LIBS = libs)
 
+    # OpenMP
+    if env['openmp']:
+        if env['msvc']:
+            env.Append(CCFLAGS = ['/openmp'])
+            # When building openmp release VS2008 link.exe crashes with LNK1103 error.
+            # Workaround: overwrite PDB flags with empty value as it isn't required anyways
+            if env['build'] == 'release':
+                env['PDB'] = ''
+        if env['gcc']:
+            env.Append(CCFLAGS = ['-fopenmp'])
+            env.Append(LIBS = ['gomp'])
+
     # Load tools
     env.Tool('lex')
     env.Tool('yacc')
diff --git a/src/mesa/swrast/s_aatritemp.h b/src/mesa/swrast/s_aatritemp.h
index 91d4f7a10ab..77b3ae6ec7a 100644
--- a/src/mesa/swrast/s_aatritemp.h
+++ b/src/mesa/swrast/s_aatritemp.h
@@ -181,13 +181,20 @@
       const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS];
       const GLfloat dxdy = majDx / majDy;
       const GLfloat xAdj = dxdy < 0.0F ? -dxdy : 0.0F;
-      GLfloat x = pMin[0] - (yMin - iyMin) * dxdy;
       GLint iy;
-      for (iy = iyMin; iy < iyMax; iy++, x += dxdy) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span)
+#endif
+      for (iy = iyMin; iy < iyMax; iy++) {
+         GLfloat x = pMin[0] - (yMin - iy) * dxdy;
          GLint ix, startX = (GLint) (x - xAdj);
          GLuint count;
          GLfloat coverage = 0.0F;
 
+#ifdef _OPENMP
+         /* each thread needs to use a different (global) SpanArrays variable */
+         span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num();
+#endif
          /* skip over fragments with zero coverage */
          while (startX < MAX_WIDTH) {
             coverage = compute_coveragef(pMin, pMid, pMax, startX, iy);
@@ -228,13 +235,12 @@
             coverage = compute_coveragef(pMin, pMid, pMax, ix, iy);
          }
          
-         if (ix <= startX)
-            continue;
-         
-         span.x = startX;
-         span.y = iy;
-         span.end = (GLuint) ix - (GLuint) startX;
-         _swrast_write_rgba_span(ctx, &span);
+         if (ix > startX) {
+            span.x = startX;
+            span.y = iy;
+            span.end = (GLuint) ix - (GLuint) startX;
+            _swrast_write_rgba_span(ctx, &span);
+         }
       }
    }
    else {
@@ -244,13 +250,20 @@
       const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS];
       const GLfloat dxdy = majDx / majDy;
       const GLfloat xAdj = dxdy > 0 ? dxdy : 0.0F;
-      GLfloat x = pMin[0] - (yMin - iyMin) * dxdy;
       GLint iy;
-      for (iy = iyMin; iy < iyMax; iy++, x += dxdy) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span)
+#endif
+      for (iy = iyMin; iy < iyMax; iy++) {
+         GLfloat x = pMin[0] - (yMin - iy) * dxdy;
          GLint ix, left, startX = (GLint) (x + xAdj);
          GLuint count, n;
          GLfloat coverage = 0.0F;
          
+#ifdef _OPENMP
+         /* each thread needs to use a different (global) SpanArrays variable */
+         span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num();
+#endif
          /* make sure we're not past the window edge */
          if (startX >= ctx->DrawBuffer->_Xmax) {
             startX = ctx->DrawBuffer->_Xmax - 1;
@@ -296,31 +309,30 @@
          ATTRIB_LOOP_END
 #endif
 
-         if (startX <= ix)
-            continue;
+         if (startX > ix) {
+            n = (GLuint) startX - (GLuint) ix;
 
-         n = (GLuint) startX - (GLuint) ix;
+            left = ix + 1;
 
-         left = ix + 1;
-
-         /* shift all values to the left */
-         /* XXX this is temporary */
-         {
-            SWspanarrays *array = span.array;
-            GLint j;
-            for (j = 0; j < (GLint) n; j++) {
-               array->coverage[j] = array->coverage[j + left];
-               COPY_CHAN4(array->rgba[j], array->rgba[j + left]);
+            /* shift all values to the left */
+            /* XXX this is temporary */
+            {
+               SWspanarrays *array = span.array;
+               GLint j;
+               for (j = 0; j < (GLint) n; j++) {
+                  array->coverage[j] = array->coverage[j + left];
+                  COPY_CHAN4(array->rgba[j], array->rgba[j + left]);
 #ifdef DO_Z
-               array->z[j] = array->z[j + left];
+                  array->z[j] = array->z[j + left];
 #endif
+               }
             }
-         }
 
-         span.x = left;
-         span.y = iy;
-         span.end = n;
-         _swrast_write_rgba_span(ctx, &span);
+            span.x = left;
+            span.y = iy;
+            span.end = n;
+            _swrast_write_rgba_span(ctx, &span);
+         }
       }
    }
 }
diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c
index def1531d7ff..4434f11b990 100644
--- a/src/mesa/swrast/s_context.c
+++ b/src/mesa/swrast/s_context.c
@@ -772,6 +772,11 @@ _swrast_CreateContext( struct gl_context *ctx )
 {
    GLuint i;
    SWcontext *swrast = (SWcontext *)CALLOC(sizeof(SWcontext));
+#ifdef _OPENMP
+   const GLint maxThreads = omp_get_max_threads();
+#else
+   const GLint maxThreads = 1;
+#endif
 
    if (SWRAST_DEBUG) {
       _mesa_debug(ctx, "_swrast_CreateContext\n");
@@ -806,19 +811,25 @@ _swrast_CreateContext( struct gl_context *ctx )
    for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++)
       swrast->TextureSample[i] = NULL;
 
-   swrast->SpanArrays = MALLOC_STRUCT(sw_span_arrays);
+   /* SpanArrays is global and shared by all SWspan instances. However, when
+    * using multiple threads, it is necessary to have one SpanArrays instance
+    * per thread.
+    */
+   swrast->SpanArrays = (SWspanarrays *) MALLOC(maxThreads * sizeof(SWspanarrays));
    if (!swrast->SpanArrays) {
       FREE(swrast);
       return GL_FALSE;
    }
-   swrast->SpanArrays->ChanType = CHAN_TYPE;
+   for(i = 0; i < maxThreads; i++) {
+      swrast->SpanArrays[i].ChanType = CHAN_TYPE;
 #if CHAN_TYPE == GL_UNSIGNED_BYTE
-   swrast->SpanArrays->rgba = swrast->SpanArrays->rgba8;
+      swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba8;
 #elif CHAN_TYPE == GL_UNSIGNED_SHORT
-   swrast->SpanArrays->rgba = swrast->SpanArrays->rgba16;
+      swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba16;
 #else
-   swrast->SpanArrays->rgba = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0];
+      swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].attribs[FRAG_ATTRIB_COL0];
 #endif
+   }
 
    /* init point span buffer */
    swrast->PointSpan.primitive = GL_POINT;
@@ -826,7 +837,10 @@ _swrast_CreateContext( struct gl_context *ctx )
    swrast->PointSpan.facing = 0;
    swrast->PointSpan.array = swrast->SpanArrays;
 
-   swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits *
+   /* TexelBuffer is also global and normally shared by all SWspan instances;
+    * when running with multiple threads, create one per thread.
+    */
+   swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads *
                                            MAX_WIDTH * 4 * sizeof(GLfloat));
    if (!swrast->TexelBuffer) {
       FREE(swrast->SpanArrays);
diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c
index 086ed0b33d7..80b9dff3cc2 100644
--- a/src/mesa/swrast/s_texcombine.c
+++ b/src/mesa/swrast/s_texcombine.c
@@ -48,7 +48,11 @@ typedef float (*float4_array)[4];
 static INLINE float4_array
 get_texel_array(SWcontext *swrast, GLuint unit)
 {
+#ifdef _OPENMP
+   return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num()));
+#else
    return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4);
+#endif
 }
 
 
diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c
index 18f095f0d4b..881d5d5f535 100644
--- a/src/mesa/tnl/t_pipeline.c
+++ b/src/mesa/tnl/t_pipeline.c
@@ -146,7 +146,17 @@ void _tnl_run_pipeline( struct gl_context *ctx )
 	 _tnl_notify_pipeline_output_change( ctx );
    }
 
+#ifndef _OPENMP
+   /* Don't adjust FPU precision mode in case multiple threads are to be used.
+    * This would require that the additional threads also changed the FPU mode
+    * which is quite a mess as this had to be done in all parallelized sections;
+    * otherwise the master thread and all other threads are running in different
+    * modes, producing inconsistent results.
+    * Note that all x64 implementations don't define/use START_FAST_MATH, so
+    * this is "hack" is only used in i386 mode
+    */
    START_FAST_MATH(__tmp);
+#endif
 
    for (i = 0; i < tnl->pipeline.nr_stages ; i++) {
       struct tnl_pipeline_stage *s = &tnl->pipeline.stages[i];
@@ -154,7 +164,9 @@ void _tnl_run_pipeline( struct gl_context *ctx )
 	 break;
    }
 
+#ifndef _OPENMP
    END_FAST_MATH(__tmp);
+#endif
 }
 
 

From 37a64baea87c470a68f9b2582af86783eb3509c4 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 11 Aug 2011 08:52:41 -0600
Subject: [PATCH 280/600] swrast: don't try to do depth testing if there's no
 depth buffer

Fixes piglit hiz-depth-stencil-test-fbo-d0-s8 crash.
See http://bugs.freedesktop.org/show_bug.cgi?id=37907

NOTE: This is a candidate for the 7.11 branch.
---
 src/mesa/swrast/s_stencil.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c
index 5bec71c057b..fa5093a3407 100644
--- a/src/mesa/swrast/s_stencil.c
+++ b/src/mesa/swrast/s_stencil.c
@@ -462,7 +462,8 @@ stencil_and_ztest_span(struct gl_context *ctx, SWspan *span, GLuint face)
     * Some fragments passed the stencil test, apply depth test to them
     * and apply Zpass and Zfail stencil ops.
     */
-   if (ctx->Depth.Test == GL_FALSE) {
+   if (ctx->Depth.Test == GL_FALSE ||
+       ctx->DrawBuffer->_DepthBuffer == NULL) {
       /*
        * No depth buffer, just apply zpass stencil function to active pixels.
        */

From 9b8287f8f5398647ced3a52885233d58e548c2b7 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 11 Aug 2011 08:58:08 -0600
Subject: [PATCH 281/600] mesa: fix ColorMask array index in
 _mesa_init_driver_state()

This doesn't really make any difference because all the colormasks
are the same upon context set-up, but it makes more sense.
---
 src/mesa/drivers/common/driverfuncs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 76630264bf7..70f8727a092 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -248,10 +248,10 @@ _mesa_init_driver_state(struct gl_context *ctx)
       GLuint i;
       for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
          ctx->Driver.ColorMaskIndexed(ctx, i,
-                                      ctx->Color.ColorMask[0][RCOMP],
-                                      ctx->Color.ColorMask[0][GCOMP],
-                                      ctx->Color.ColorMask[0][BCOMP],
-                                      ctx->Color.ColorMask[0][ACOMP]);
+                                      ctx->Color.ColorMask[i][RCOMP],
+                                      ctx->Color.ColorMask[i][GCOMP],
+                                      ctx->Color.ColorMask[i][BCOMP],
+                                      ctx->Color.ColorMask[i][ACOMP]);
       }
    }
    else {

From 099aad2fb0dba8baff61dc7a6803c6c976c08069 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 11 Aug 2011 09:02:16 -0600
Subject: [PATCH 282/600] mesa: fix initialization of GL_FOG_MODE in
 _mesa_init_driver_state()

---
 src/mesa/drivers/common/driverfuncs.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 70f8727a092..a6174ee2f56 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -286,7 +286,10 @@ _mesa_init_driver_state(struct gl_context *ctx)
    ctx->Driver.Enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE);
 
    ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
-   ctx->Driver.Fogfv(ctx, GL_FOG_MODE, 0);
+   {
+      GLfloat mode = (GLfloat) ctx->Fog.Mode;
+      ctx->Driver.Fogfv(ctx, GL_FOG_MODE, &mode);
+   }
    ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density);
    ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
    ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);

From 9cd64ec35acd54cbe0be4d03236d2c5a9d4be6fe Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Wed, 10 Aug 2011 15:46:14 -0700
Subject: [PATCH 283/600] x86-64: Fix compile error with clang

Remove the 'f' suffix from a float literal.
    - .float 0.0f+1.0
    + .float 1.0

This fixes the following compile error with clang:
    error: unexpected token in directive
    .float 0.0f+1.0
              ^

Note: This is a candidate for the stable branches.
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/x86-64/xform4.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/x86-64/xform4.S b/src/mesa/x86-64/xform4.S
index 6141e434679..5abd5a25de5 100644
--- a/src/mesa/x86-64/xform4.S
+++ b/src/mesa/x86-64/xform4.S
@@ -118,7 +118,7 @@ p4_constants:
 .byte  0x00, 0x00, 0x00, 0x00
 .byte  0x00, 0x00, 0x00, 0x00
 .byte  0x00, 0x00, 0x00, 0x00
-.float 0f+1.0
+.float 1.0
 
 .text
 .align 16

From 3ce243879a72363ca3283fe6ab30381e05a9b4a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Fri, 12 Aug 2011 11:10:56 +0200
Subject: [PATCH 284/600] gallium/gbm: Add dependencies for libraries linked
 into pipe_*.so.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/targets/gbm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile
index b38782c4070..faacc89f1a0 100644
--- a/src/gallium/targets/gbm/Makefile
+++ b/src/gallium/targets/gbm/Makefile
@@ -146,7 +146,7 @@ $(GBM_EXTRA_TARGETS): $(TOP)/$(LIB_DIR)/gbm/%: %
 	@$(INSTALL) -d $(dir $@)
 	$(INSTALL) $< $(dir $@)
 
-$(pipe_TARGETS): $(PIPE_PREFIX)%.so: pipe_%.o
+$(pipe_TARGETS): $(PIPE_PREFIX)%.so: pipe_%.o $(pipe_LIBS) $($*_LIBS)
 	$(MKLIB) -o $@ -noprefix -linker '$(CC)' \
 		-ldflags '-L$(TOP)/$(LIB_DIR) $(pipe_LDFLAGS) $(LDFLAGS)' \
 		$(MKLIB_OPTIONS) $< \

From 281947b3511f606df365e0985631f31d237e63f1 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sun, 7 Aug 2011 16:58:29 +0900
Subject: [PATCH 285/600] glapi: add gles_api.py

Move the list of entry points belong to GLES from mapi_abi.py to a new
file.

Until we figure out how to describe the APIs an entry point belongs to
in the XML file, and how to handle the case where an entry point others
alias is missing in some APIs, this is an easier solution than
maintaining another two sets of XMLs in glapi/gen-es/.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mapi/glapi/gen/gl_and_es_API.xml |   5 +
 src/mapi/glapi/gen/gles_api.py       | 452 +++++++++++++++++++++++++++
 src/mapi/mapi/mapi_abi.py            | 424 +------------------------
 3 files changed, 464 insertions(+), 417 deletions(-)
 create mode 100644 src/mapi/glapi/gen/gles_api.py

diff --git a/src/mapi/glapi/gen/gl_and_es_API.xml b/src/mapi/glapi/gen/gl_and_es_API.xml
index ac7d43ceda7..1313da0f5d6 100644
--- a/src/mapi/glapi/gen/gl_and_es_API.xml
+++ b/src/mapi/glapi/gen/gl_and_es_API.xml
@@ -3,6 +3,11 @@
 
 <!-- OpenGL + OpenGL ES -->
 
+<!-- IMPORTANT
+     Remember to update gles_api.py when new OpenGL ES specific entry points
+     are added.  Otherwise, they will be filtered out.
+-->
+
 <OpenGLAPI>
 
 <xi:include href="gl_API.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
diff --git a/src/mapi/glapi/gen/gles_api.py b/src/mapi/glapi/gen/gles_api.py
new file mode 100644
index 00000000000..4cde9e544d5
--- /dev/null
+++ b/src/mapi/glapi/gen/gles_api.py
@@ -0,0 +1,452 @@
+#!/usr/bin/env python
+
+# Mesa 3-D graphics library
+# Version:  7.12
+#
+# Copyright (C) 2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+# Authors:
+#    Chia-I Wu <olv@lunarg.com>
+
+# These info should be part of GLAPI XML.  Until that is possible, scripts have
+# to use tables here to filter gl_api.
+
+es1_core = (
+        # OpenGL ES 1.1
+        'ActiveTexture',
+        'AlphaFunc',
+        'AlphaFuncx',
+        'BindBuffer',
+        'BindTexture',
+        'BlendFunc',
+        'BufferData',
+        'BufferSubData',
+        'Clear',
+        'ClearColor',
+        'ClearColorx',
+        'ClearDepthf',
+        'ClearDepthx',
+        'ClearStencil',
+        'ClientActiveTexture',
+        'ClipPlanef',
+        'ClipPlanex',
+        'Color4f',
+        'Color4ub',
+        'Color4x',
+        'ColorMask',
+        'ColorPointer',
+        'CompressedTexImage2D',
+        'CompressedTexSubImage2D',
+        'CopyTexImage2D',
+        'CopyTexSubImage2D',
+        'CullFace',
+        'DeleteBuffers',
+        'DeleteTextures',
+        'DepthFunc',
+        'DepthMask',
+        'DepthRangef',
+        'DepthRangex',
+        'Disable',
+        'DisableClientState',
+        'DrawArrays',
+        'DrawElements',
+        'Enable',
+        'EnableClientState',
+        'Finish',
+        'Flush',
+        'Fogf',
+        'Fogfv',
+        'Fogx',
+        'Fogxv',
+        'FrontFace',
+        'Frustumf',
+        'Frustumx',
+        'GenBuffers',
+        'GenTextures',
+        'GetBooleanv',
+        'GetBufferParameteriv',
+        'GetClipPlanef',
+        'GetClipPlanex',
+        'GetError',
+        'GetFixedv',
+        'GetFloatv',
+        'GetIntegerv',
+        'GetLightfv',
+        'GetLightxv',
+        'GetMaterialfv',
+        'GetMaterialxv',
+        'GetPointerv',
+        'GetString',
+        'GetTexEnvfv',
+        'GetTexEnviv',
+        'GetTexEnvxv',
+        'GetTexParameterfv',
+        'GetTexParameteriv',
+        'GetTexParameterxv',
+        'Hint',
+        'IsBuffer',
+        'IsEnabled',
+        'IsTexture',
+        'Lightf',
+        'Lightfv',
+        'LightModelf',
+        'LightModelfv',
+        'LightModelx',
+        'LightModelxv',
+        'Lightx',
+        'Lightxv',
+        'LineWidth',
+        'LineWidthx',
+        'LoadIdentity',
+        'LoadMatrixf',
+        'LoadMatrixx',
+        'LogicOp',
+        'Materialf',
+        'Materialfv',
+        'Materialx',
+        'Materialxv',
+        'MatrixMode',
+        'MultiTexCoord4f',
+        'MultiTexCoord4x',
+        'MultMatrixf',
+        'MultMatrixx',
+        'Normal3f',
+        'Normal3x',
+        'NormalPointer',
+        'Orthof',
+        'Orthox',
+        'PixelStorei',
+        'PointParameterf',
+        'PointParameterfv',
+        'PointParameterx',
+        'PointParameterxv',
+        'PointSize',
+        'PointSizex',
+        'PolygonOffset',
+        'PolygonOffsetx',
+        'PopMatrix',
+        'PushMatrix',
+        'ReadPixels',
+        'Rotatef',
+        'Rotatex',
+        'SampleCoverage',
+        'SampleCoveragex',
+        'Scalef',
+        'Scalex',
+        'Scissor',
+        'ShadeModel',
+        'StencilFunc',
+        'StencilMask',
+        'StencilOp',
+        'TexCoordPointer',
+        'TexEnvf',
+        'TexEnvfv',
+        'TexEnvi',
+        'TexEnviv',
+        'TexEnvx',
+        'TexEnvxv',
+        'TexImage2D',
+        'TexParameterf',
+        'TexParameterfv',
+        'TexParameteri',
+        'TexParameteriv',
+        'TexParameterx',
+        'TexParameterxv',
+        'TexSubImage2D',
+        'Translatef',
+        'Translatex',
+        'VertexPointer',
+        'Viewport',
+)
+
+es1_api = es1_core + (
+        # GL_OES_EGL_image
+        'EGLImageTargetTexture2DOES',
+        'EGLImageTargetRenderbufferStorageOES',
+        # GL_OES_mapbuffer
+        'GetBufferPointervOES',
+        'MapBufferOES',
+        'UnmapBufferOES',
+        # GL_EXT_multi_draw_arrays
+        'MultiDrawArraysEXT',
+        'MultiDrawElementsEXT',
+        # GL_OES_blend_equation_separate
+        'BlendEquationSeparateOES',
+        # GL_OES_blend_func_separate
+        'BlendFuncSeparateOES',
+        # GL_OES_blend_subtract
+        'BlendEquationOES',
+        # GL_OES_draw_texture
+        'DrawTexiOES',
+        'DrawTexivOES',
+        'DrawTexfOES',
+        'DrawTexfvOES',
+        'DrawTexsOES',
+        'DrawTexsvOES',
+        'DrawTexxOES',
+        'DrawTexxvOES',
+        # GL_OES_fixed_point
+        'AlphaFuncxOES',
+        'ClearColorxOES',
+        'ClearDepthxOES',
+        'Color4xOES',
+        'DepthRangexOES',
+        'FogxOES',
+        'FogxvOES',
+        'FrustumxOES',
+        'LightModelxOES',
+        'LightModelxvOES',
+        'LightxOES',
+        'LightxvOES',
+        'LineWidthxOES',
+        'LoadMatrixxOES',
+        'MaterialxOES',
+        'MaterialxvOES',
+        'MultiTexCoord4xOES',
+        'MultMatrixxOES',
+        'Normal3xOES',
+        'OrthoxOES',
+        'PointSizexOES',
+        'PolygonOffsetxOES',
+        'RotatexOES',
+        'SampleCoveragexOES',
+        'ScalexOES',
+        'TexEnvxOES',
+        'TexEnvxvOES',
+        'TexParameterxOES',
+        'TranslatexOES',
+        'ClipPlanexOES',
+        'GetClipPlanexOES',
+        'GetFixedvOES',
+        'GetLightxvOES',
+        'GetMaterialxvOES',
+        'GetTexEnvxvOES',
+        'GetTexParameterxvOES',
+        'PointParameterxOES',
+        'PointParameterxvOES',
+        'TexParameterxvOES',
+        # GL_OES_framebuffer_object
+        'BindFramebufferOES',
+        'BindRenderbufferOES',
+        'CheckFramebufferStatusOES',
+        'DeleteFramebuffersOES',
+        'DeleteRenderbuffersOES',
+        'FramebufferRenderbufferOES',
+        'FramebufferTexture2DOES',
+        'GenerateMipmapOES',
+        'GenFramebuffersOES',
+        'GenRenderbuffersOES',
+        'GetFramebufferAttachmentParameterivOES',
+        'GetRenderbufferParameterivOES',
+        'IsFramebufferOES',
+        'IsRenderbufferOES',
+        'RenderbufferStorageOES',
+        # GL_OES_point_size_array
+        'PointSizePointerOES',
+        # GL_OES_query_matrix
+        'QueryMatrixxOES',
+        # GL_OES_single_precision
+        'ClearDepthfOES',
+        'DepthRangefOES',
+        'FrustumfOES',
+        'OrthofOES',
+        'ClipPlanefOES',
+        'GetClipPlanefOES',
+        # GL_OES_texture_cube_map
+        'GetTexGenfvOES',
+        'GetTexGenivOES',
+        'GetTexGenxvOES',
+        'TexGenfOES',
+        'TexGenfvOES',
+        'TexGeniOES',
+        'TexGenivOES',
+        'TexGenxOES',
+        'TexGenxvOES',
+)
+
+es2_core = (
+        # OpenGL ES 2.0
+        "ActiveTexture",
+        "AttachShader",
+        "BindAttribLocation",
+        "BindBuffer",
+        "BindFramebuffer",
+        "BindRenderbuffer",
+        "BindTexture",
+        "BlendColor",
+        "BlendEquation",
+        "BlendEquationSeparate",
+        "BlendFunc",
+        "BlendFuncSeparate",
+        "BufferData",
+        "BufferSubData",
+        "CheckFramebufferStatus",
+        "Clear",
+        "ClearColor",
+        "ClearDepthf",
+        "ClearStencil",
+        "ColorMask",
+        "CompileShader",
+        "CompressedTexImage2D",
+        "CompressedTexSubImage2D",
+        "CopyTexImage2D",
+        "CopyTexSubImage2D",
+        "CreateProgram",
+        "CreateShader",
+        "CullFace",
+        "DeleteBuffers",
+        "DeleteFramebuffers",
+        "DeleteProgram",
+        "DeleteRenderbuffers",
+        "DeleteShader",
+        "DeleteTextures",
+        "DepthFunc",
+        "DepthMask",
+        "DepthRangef",
+        "DetachShader",
+        "Disable",
+        "DisableVertexAttribArray",
+        "DrawArrays",
+        "DrawElements",
+        "Enable",
+        "EnableVertexAttribArray",
+        "Finish",
+        "Flush",
+        "FramebufferRenderbuffer",
+        "FramebufferTexture2D",
+        "FrontFace",
+        "GenBuffers",
+        "GenerateMipmap",
+        "GenFramebuffers",
+        "GenRenderbuffers",
+        "GenTextures",
+        "GetActiveAttrib",
+        "GetActiveUniform",
+        "GetAttachedShaders",
+        "GetAttribLocation",
+        "GetBooleanv",
+        "GetBufferParameteriv",
+        "GetError",
+        "GetFloatv",
+        "GetFramebufferAttachmentParameteriv",
+        "GetIntegerv",
+        "GetProgramInfoLog",
+        "GetProgramiv",
+        "GetRenderbufferParameteriv",
+        "GetShaderInfoLog",
+        "GetShaderiv",
+        "GetShaderPrecisionFormat",
+        "GetShaderSource",
+        "GetString",
+        "GetTexParameterfv",
+        "GetTexParameteriv",
+        "GetUniformfv",
+        "GetUniformiv",
+        "GetUniformLocation",
+        "GetVertexAttribfv",
+        "GetVertexAttribiv",
+        "GetVertexAttribPointerv",
+        "Hint",
+        "IsBuffer",
+        "IsEnabled",
+        "IsFramebuffer",
+        "IsProgram",
+        "IsRenderbuffer",
+        "IsShader",
+        "IsTexture",
+        "LineWidth",
+        "LinkProgram",
+        "PixelStorei",
+        "PolygonOffset",
+        "ReadPixels",
+        "ReleaseShaderCompiler",
+        "RenderbufferStorage",
+        "SampleCoverage",
+        "Scissor",
+        "ShaderBinary",
+        "ShaderSource",
+        "StencilFunc",
+        "StencilFuncSeparate",
+        "StencilMask",
+        "StencilMaskSeparate",
+        "StencilOp",
+        "StencilOpSeparate",
+        "TexImage2D",
+        "TexParameterf",
+        "TexParameterfv",
+        "TexParameteri",
+        "TexParameteriv",
+        "TexSubImage2D",
+        "Uniform1f",
+        "Uniform1fv",
+        "Uniform1i",
+        "Uniform1iv",
+        "Uniform2f",
+        "Uniform2fv",
+        "Uniform2i",
+        "Uniform2iv",
+        "Uniform3f",
+        "Uniform3fv",
+        "Uniform3i",
+        "Uniform3iv",
+        "Uniform4f",
+        "Uniform4fv",
+        "Uniform4i",
+        "Uniform4iv",
+        "UniformMatrix2fv",
+        "UniformMatrix3fv",
+        "UniformMatrix4fv",
+        "UseProgram",
+        "ValidateProgram",
+        "VertexAttrib1f",
+        "VertexAttrib1fv",
+        "VertexAttrib2f",
+        "VertexAttrib2fv",
+        "VertexAttrib3f",
+        "VertexAttrib3fv",
+        "VertexAttrib4f",
+        "VertexAttrib4fv",
+        "VertexAttribPointer",
+        "Viewport",
+)
+
+es2_api = es2_core + (
+        # GL_OES_EGL_image
+        'EGLImageTargetTexture2DOES',
+        'EGLImageTargetRenderbufferStorageOES',
+        # GL_OES_mapbuffer
+        'GetBufferPointervOES',
+        'MapBufferOES',
+        'UnmapBufferOES',
+        # GL_EXT_multi_draw_arrays
+        'MultiDrawArraysEXT',
+        'MultiDrawElementsEXT',
+        # GL_OES_texture_3D
+        'CompressedTexImage3DOES',
+        'CompressedTexSubImage3DOES',
+        'CopyTexSubImage3DOES',
+        'FramebufferTexture3DOES',
+        'TexImage3DOES',
+        'TexSubImage3DOES',
+        # GL_OES_get_program_binary
+        'GetProgramBinaryOES',
+        'ProgramBinaryOES',
+)
diff --git a/src/mapi/mapi/mapi_abi.py b/src/mapi/mapi/mapi_abi.py
index cb9fc0ef841..e3d3f6518ec 100644
--- a/src/mapi/mapi/mapi_abi.py
+++ b/src/mapi/mapi/mapi_abi.py
@@ -27,6 +27,11 @@
 #    Chia-I Wu <olv@lunarg.com>
 
 import sys
+# make it possible to import glapi
+import os
+GLAPI = "./%s/../glapi/gen" % (os.path.dirname(sys.argv[0]))
+sys.path.append(GLAPI)
+
 import re
 from optparse import OptionParser
 
@@ -128,9 +133,6 @@ class ABIEntry(object):
 
 def abi_parse_xml(xml):
     """Parse a GLAPI XML file for ABI entries."""
-    import os
-    GLAPI = "./%s/../glapi/gen" % (os.path.dirname(sys.argv[0]))
-    sys.path.append(GLAPI)
     import gl_XML, glX_XML
 
     api = gl_XML.parse_GL_API(xml, glX_XML.glx_item_factory())
@@ -749,255 +751,7 @@ class ES1APIPrinter(GLAPIPrinter):
     """OpenGL ES 1.x API Printer"""
 
     def __init__(self, entries):
-        es1_api = [
-                # OpenGL ES 1.1
-                'ActiveTexture',
-                'AlphaFunc',
-                'AlphaFuncx',
-                'BindBuffer',
-                'BindTexture',
-                'BlendFunc',
-                'BufferData',
-                'BufferSubData',
-                'Clear',
-                'ClearColor',
-                'ClearColorx',
-                'ClearDepthf',
-                'ClearDepthx',
-                'ClearStencil',
-                'ClientActiveTexture',
-                'ClipPlanef',
-                'ClipPlanex',
-                'Color4f',
-                'Color4ub',
-                'Color4x',
-                'ColorMask',
-                'ColorPointer',
-                'CompressedTexImage2D',
-                'CompressedTexSubImage2D',
-                'CopyTexImage2D',
-                'CopyTexSubImage2D',
-                'CullFace',
-                'DeleteBuffers',
-                'DeleteTextures',
-                'DepthFunc',
-                'DepthMask',
-                'DepthRangef',
-                'DepthRangex',
-                'Disable',
-                'DisableClientState',
-                'DrawArrays',
-                'DrawElements',
-                'Enable',
-                'EnableClientState',
-                'Finish',
-                'Flush',
-                'Fogf',
-                'Fogfv',
-                'Fogx',
-                'Fogxv',
-                'FrontFace',
-                'Frustumf',
-                'Frustumx',
-                'GenBuffers',
-                'GenTextures',
-                'GetBooleanv',
-                'GetBufferParameteriv',
-                'GetClipPlanef',
-                'GetClipPlanex',
-                'GetError',
-                'GetFixedv',
-                'GetFloatv',
-                'GetIntegerv',
-                'GetLightfv',
-                'GetLightxv',
-                'GetMaterialfv',
-                'GetMaterialxv',
-                'GetPointerv',
-                'GetString',
-                'GetTexEnvfv',
-                'GetTexEnviv',
-                'GetTexEnvxv',
-                'GetTexParameterfv',
-                'GetTexParameteriv',
-                'GetTexParameterxv',
-                'Hint',
-                'IsBuffer',
-                'IsEnabled',
-                'IsTexture',
-                'Lightf',
-                'Lightfv',
-                'LightModelf',
-                'LightModelfv',
-                'LightModelx',
-                'LightModelxv',
-                'Lightx',
-                'Lightxv',
-                'LineWidth',
-                'LineWidthx',
-                'LoadIdentity',
-                'LoadMatrixf',
-                'LoadMatrixx',
-                'LogicOp',
-                'Materialf',
-                'Materialfv',
-                'Materialx',
-                'Materialxv',
-                'MatrixMode',
-                'MultiTexCoord4f',
-                'MultiTexCoord4x',
-                'MultMatrixf',
-                'MultMatrixx',
-                'Normal3f',
-                'Normal3x',
-                'NormalPointer',
-                'Orthof',
-                'Orthox',
-                'PixelStorei',
-                'PointParameterf',
-                'PointParameterfv',
-                'PointParameterx',
-                'PointParameterxv',
-                'PointSize',
-                'PointSizex',
-                'PolygonOffset',
-                'PolygonOffsetx',
-                'PopMatrix',
-                'PushMatrix',
-                'ReadPixels',
-                'Rotatef',
-                'Rotatex',
-                'SampleCoverage',
-                'SampleCoveragex',
-                'Scalef',
-                'Scalex',
-                'Scissor',
-                'ShadeModel',
-                'StencilFunc',
-                'StencilMask',
-                'StencilOp',
-                'TexCoordPointer',
-                'TexEnvf',
-                'TexEnvfv',
-                'TexEnvi',
-                'TexEnviv',
-                'TexEnvx',
-                'TexEnvxv',
-                'TexImage2D',
-                'TexParameterf',
-                'TexParameterfv',
-                'TexParameteri',
-                'TexParameteriv',
-                'TexParameterx',
-                'TexParameterxv',
-                'TexSubImage2D',
-                'Translatef',
-                'Translatex',
-                'VertexPointer',
-                'Viewport',
-                # GL_OES_EGL_image
-                'EGLImageTargetTexture2DOES',
-                'EGLImageTargetRenderbufferStorageOES',
-                # GL_OES_mapbuffer
-                'GetBufferPointervOES',
-                'MapBufferOES',
-                'UnmapBufferOES',
-                # GL_EXT_multi_draw_arrays
-                'MultiDrawArraysEXT',
-                'MultiDrawElementsEXT',
-                # GL_OES_blend_equation_separate
-                'BlendEquationSeparateOES',
-                # GL_OES_blend_func_separate
-                'BlendFuncSeparateOES',
-                # GL_OES_blend_subtract
-                'BlendEquationOES',
-                # GL_OES_draw_texture
-                'DrawTexiOES',
-                'DrawTexivOES',
-                'DrawTexfOES',
-                'DrawTexfvOES',
-                'DrawTexsOES',
-                'DrawTexsvOES',
-                'DrawTexxOES',
-                'DrawTexxvOES',
-                # GL_OES_fixed_point
-                'AlphaFuncxOES',
-                'ClearColorxOES',
-                'ClearDepthxOES',
-                'Color4xOES',
-                'DepthRangexOES',
-                'FogxOES',
-                'FogxvOES',
-                'FrustumxOES',
-                'LightModelxOES',
-                'LightModelxvOES',
-                'LightxOES',
-                'LightxvOES',
-                'LineWidthxOES',
-                'LoadMatrixxOES',
-                'MaterialxOES',
-                'MaterialxvOES',
-                'MultiTexCoord4xOES',
-                'MultMatrixxOES',
-                'Normal3xOES',
-                'OrthoxOES',
-                'PointSizexOES',
-                'PolygonOffsetxOES',
-                'RotatexOES',
-                'SampleCoveragexOES',
-                'ScalexOES',
-                'TexEnvxOES',
-                'TexEnvxvOES',
-                'TexParameterxOES',
-                'TranslatexOES',
-                'ClipPlanexOES',
-                'GetClipPlanexOES',
-                'GetFixedvOES',
-                'GetLightxvOES',
-                'GetMaterialxvOES',
-                'GetTexEnvxvOES',
-                'GetTexParameterxvOES',
-                'PointParameterxOES',
-                'PointParameterxvOES',
-                'TexParameterxvOES',
-                # GL_OES_framebuffer_object
-                'BindFramebufferOES',
-                'BindRenderbufferOES',
-                'CheckFramebufferStatusOES',
-                'DeleteFramebuffersOES',
-                'DeleteRenderbuffersOES',
-                'FramebufferRenderbufferOES',
-                'FramebufferTexture2DOES',
-                'GenerateMipmapOES',
-                'GenFramebuffersOES',
-                'GenRenderbuffersOES',
-                'GetFramebufferAttachmentParameterivOES',
-                'GetRenderbufferParameterivOES',
-                'IsFramebufferOES',
-                'IsRenderbufferOES',
-                'RenderbufferStorageOES',
-                # GL_OES_point_size_array
-                'PointSizePointerOES',
-                # GL_OES_query_matrix
-                'QueryMatrixxOES',
-                # GL_OES_single_precision
-                'ClearDepthfOES',
-                'DepthRangefOES',
-                'FrustumfOES',
-                'OrthofOES',
-                'ClipPlanefOES',
-                'GetClipPlanefOES',
-                # GL_OES_texture_cube_map
-                'GetTexGenfvOES',
-                'GetTexGenivOES',
-                'GetTexGenxvOES',
-                'TexGenfOES',
-                'TexGenfvOES',
-                'TexGeniOES',
-                'TexGenivOES',
-                'TexGenxOES',
-                'TexGenxvOES',
-        ]
+        from gles_api import es1_api
 
         super(ES1APIPrinter, self).__init__(entries, es1_api)
         self.prefix_lib = 'gl'
@@ -1016,171 +770,7 @@ class ES2APIPrinter(GLAPIPrinter):
     """OpenGL ES 2.x API Printer"""
 
     def __init__(self, entries):
-        es2_api = [
-                # OpenGL ES 2.0
-                "ActiveTexture",
-                "AttachShader",
-                "BindAttribLocation",
-                "BindBuffer",
-                "BindFramebuffer",
-                "BindRenderbuffer",
-                "BindTexture",
-                "BlendColor",
-                "BlendEquation",
-                "BlendEquationSeparate",
-                "BlendFunc",
-                "BlendFuncSeparate",
-                "BufferData",
-                "BufferSubData",
-                "CheckFramebufferStatus",
-                "Clear",
-                "ClearColor",
-                "ClearDepthf",
-                "ClearStencil",
-                "ColorMask",
-                "CompileShader",
-                "CompressedTexImage2D",
-                "CompressedTexSubImage2D",
-                "CopyTexImage2D",
-                "CopyTexSubImage2D",
-                "CreateProgram",
-                "CreateShader",
-                "CullFace",
-                "DeleteBuffers",
-                "DeleteFramebuffers",
-                "DeleteProgram",
-                "DeleteRenderbuffers",
-                "DeleteShader",
-                "DeleteTextures",
-                "DepthFunc",
-                "DepthMask",
-                "DepthRangef",
-                "DetachShader",
-                "Disable",
-                "DisableVertexAttribArray",
-                "DrawArrays",
-                "DrawElements",
-                "Enable",
-                "EnableVertexAttribArray",
-                "Finish",
-                "Flush",
-                "FramebufferRenderbuffer",
-                "FramebufferTexture2D",
-                "FrontFace",
-                "GenBuffers",
-                "GenerateMipmap",
-                "GenFramebuffers",
-                "GenRenderbuffers",
-                "GenTextures",
-                "GetActiveAttrib",
-                "GetActiveUniform",
-                "GetAttachedShaders",
-                "GetAttribLocation",
-                "GetBooleanv",
-                "GetBufferParameteriv",
-                "GetError",
-                "GetFloatv",
-                "GetFramebufferAttachmentParameteriv",
-                "GetIntegerv",
-                "GetProgramInfoLog",
-                "GetProgramiv",
-                "GetRenderbufferParameteriv",
-                "GetShaderInfoLog",
-                "GetShaderiv",
-                "GetShaderPrecisionFormat",
-                "GetShaderSource",
-                "GetString",
-                "GetTexParameterfv",
-                "GetTexParameteriv",
-                "GetUniformfv",
-                "GetUniformiv",
-                "GetUniformLocation",
-                "GetVertexAttribfv",
-                "GetVertexAttribiv",
-                "GetVertexAttribPointerv",
-                "Hint",
-                "IsBuffer",
-                "IsEnabled",
-                "IsFramebuffer",
-                "IsProgram",
-                "IsRenderbuffer",
-                "IsShader",
-                "IsTexture",
-                "LineWidth",
-                "LinkProgram",
-                "PixelStorei",
-                "PolygonOffset",
-                "ReadPixels",
-                "ReleaseShaderCompiler",
-                "RenderbufferStorage",
-                "SampleCoverage",
-                "Scissor",
-                "ShaderBinary",
-                "ShaderSource",
-                "StencilFunc",
-                "StencilFuncSeparate",
-                "StencilMask",
-                "StencilMaskSeparate",
-                "StencilOp",
-                "StencilOpSeparate",
-                "TexImage2D",
-                "TexParameterf",
-                "TexParameterfv",
-                "TexParameteri",
-                "TexParameteriv",
-                "TexSubImage2D",
-                "Uniform1f",
-                "Uniform1fv",
-                "Uniform1i",
-                "Uniform1iv",
-                "Uniform2f",
-                "Uniform2fv",
-                "Uniform2i",
-                "Uniform2iv",
-                "Uniform3f",
-                "Uniform3fv",
-                "Uniform3i",
-                "Uniform3iv",
-                "Uniform4f",
-                "Uniform4fv",
-                "Uniform4i",
-                "Uniform4iv",
-                "UniformMatrix2fv",
-                "UniformMatrix3fv",
-                "UniformMatrix4fv",
-                "UseProgram",
-                "ValidateProgram",
-                "VertexAttrib1f",
-                "VertexAttrib1fv",
-                "VertexAttrib2f",
-                "VertexAttrib2fv",
-                "VertexAttrib3f",
-                "VertexAttrib3fv",
-                "VertexAttrib4f",
-                "VertexAttrib4fv",
-                "VertexAttribPointer",
-                "Viewport",
-                # GL_OES_EGL_image
-                'EGLImageTargetTexture2DOES',
-                'EGLImageTargetRenderbufferStorageOES',
-                # GL_OES_mapbuffer
-                'GetBufferPointervOES',
-                'MapBufferOES',
-                'UnmapBufferOES',
-                # GL_EXT_multi_draw_arrays
-                'MultiDrawArraysEXT',
-                'MultiDrawElementsEXT',
-                # GL_OES_texture_3D
-                'CompressedTexImage3DOES',
-                'CompressedTexSubImage3DOES',
-                'CopyTexSubImage3DOES',
-                'FramebufferTexture3DOES',
-                'TexImage3DOES',
-                'TexSubImage3DOES',
-                # GL_OES_get_program_binary
-                'GetProgramBinaryOES',
-                'ProgramBinaryOES',
-        ]
+        from gles_api import es2_api
 
         super(ES2APIPrinter, self).__init__(entries, es2_api)
         self.prefix_lib = 'gl'

From b8202b3d44b18a3db281c64d1ca01e851ae6deb1 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sun, 7 Aug 2011 23:19:51 +0900
Subject: [PATCH 286/600] glapi: add methods to filter functions

add gl_api::filter_functions and gl_function::filter_entry_points to
filter out unwanted functions and entry points.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mapi/glapi/gen/gl_XML.py | 46 ++++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/src/mapi/glapi/gen/gl_XML.py b/src/mapi/glapi/gen/gl_XML.py
index 4d414e8b0f8..4dc2e8fa7fb 100644
--- a/src/mapi/glapi/gen/gl_XML.py
+++ b/src/mapi/glapi/gen/gl_XML.py
@@ -618,7 +618,7 @@ class gl_function( gl_item ):
 		# for each entry-point.  Otherwise, they may generate code
 		# that won't compile.
 
-		self.parameter_strings = {}
+		self.entry_point_parameters = {}
 
 		self.process_element( element )
 
@@ -703,12 +703,34 @@ class gl_function( gl_item ):
 
 		if element.children:
 			self.initialized = 1
-			self.parameter_strings[name] = create_parameter_string(parameters, 1)
+			self.entry_point_parameters[name] = parameters
 		else:
-			self.parameter_strings[name] = None
+			self.entry_point_parameters[name] = []
 
 		return
 
+	def filter_entry_points(self, entry_point_list):
+		"""Filter out entry points not in entry_point_list."""
+		if not self.initialized:
+			raise RuntimeError('%s is not initialized yet' % self.name)
+
+		entry_points = []
+		for ent in self.entry_points:
+			if ent not in entry_point_list:
+				if ent in self.static_entry_points:
+					self.static_entry_points.remove(ent)
+				self.entry_point_parameters.pop(ent)
+			else:
+				entry_points.append(ent)
+
+		if not entry_points:
+			raise RuntimeError('%s has no entry point after filtering' % self.name)
+
+		self.entry_points = entry_points
+		if self.name not in entry_points:
+			# use the first remaining entry point
+			self.name = entry_points[0]
+			self.parameters = self.entry_point_parameters[entry_points[0]]
 
 	def get_images(self):
 		"""Return potentially empty list of input images."""
@@ -721,11 +743,11 @@ class gl_function( gl_item ):
 
 	def get_parameter_string(self, entrypoint = None):
 		if entrypoint:
-			s = self.parameter_strings[ entrypoint ]
-			if s:
-				return s
+			params = self.entry_point_parameters[ entrypoint ]
+		else:
+			params = self.parameters
 		
-		return create_parameter_string( self.parameters, 1 )
+		return create_parameter_string( params, 1 )
 
 	def get_called_parameter_string(self):
 		p_string = ""
@@ -791,6 +813,16 @@ class gl_api:
 		typeexpr.create_initial_types()
 		return
 
+	def filter_functions(self, entry_point_list):
+		"""Filter out entry points not in entry_point_list."""
+		functions_by_name = {}
+		for func in self.functions_by_name.itervalues():
+			entry_points = [ent for ent in func.entry_points if ent in entry_point_list]
+			if entry_points:
+				func.filter_entry_points(entry_points)
+				functions_by_name[func.name] = func
+
+		self.functions_by_name = functions_by_name
 
 	def process_element(self, doc):
 		element = doc.children

From 5076561b35b9c2c78f277ab03bf1e642094ee20e Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Mon, 8 Aug 2011 10:14:44 +0900
Subject: [PATCH 287/600] glapi: use gl_and_es_API.xml to generate GLES headers

glapi/gen-es/ defines two sets of GLAPI XMLs for OpenGL ES 1.1
(es1_API.xml) and 2.0 (es2_API.xml) respectively.  They are used to
generate dispatch.h and remap_helper.h for GLES.  Together with
gl_and_es_API.xml, we have to maintain three sets of GLAPI XMLs.

This commit makes dispatch.h and remap_helper.h for GLES be generated
from gl_and_es_API.xml.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mapi/glapi/gen-es/Makefile     | 10 ++++++----
 src/mapi/glapi/gen/Makefile        |  6 ++----
 src/mapi/glapi/gen/gl_table.py     | 20 +++++++++++++++-----
 src/mapi/glapi/gen/remap_helper.py | 18 ++++++++++++++++--
 src/mesa/SConscript                | 16 ++++++++--------
 5 files changed, 47 insertions(+), 23 deletions(-)

diff --git a/src/mapi/glapi/gen-es/Makefile b/src/mapi/glapi/gen-es/Makefile
index bf66ec037cf..3fd539d26d5 100644
--- a/src/mapi/glapi/gen-es/Makefile
+++ b/src/mapi/glapi/gen-es/Makefile
@@ -11,8 +11,8 @@ OUTPUTS :=			\
 COMMON = gl_and_es_API.xml gl_XML.py glX_XML.py license.py typeexpr.py
 COMMON := $(addprefix $(GLAPI)/, $(COMMON))
 
-ES1_APIXML := es1_API.xml
-ES2_APIXML := es2_API.xml
+ES1_APIXML := $(GLAPI)/gl_and_es_API.xml
+ES2_APIXML := $(GLAPI)/gl_and_es_API.xml
 ES1_OUTPUT_DIR := $(TOP)/src/mapi/es1api
 ES2_OUTPUT_DIR := $(TOP)/src/mapi/es2api
 
@@ -37,10 +37,12 @@ shared-glapi: $(SHARED_GLAPI_OUTPUTS)
 
 $(ES1_OUTPUTS): APIXML := $(ES1_APIXML)
 $(ES1_OUTPUTS): PRINTER := es1api
+$(ES1_OUTPUTS): GLES_VER := es1
 $(ES1_OUTPUTS): $(ES1_DEPS)
 
 $(ES2_OUTPUTS): APIXML := $(ES2_APIXML)
 $(ES2_OUTPUTS): PRINTER := es2api
+$(ES2_OUTPUTS): GLES_VER := es2
 $(ES2_OUTPUTS): $(ES2_DEPS)
 
 $(SHARED_GLAPI_OUTPUTS): APIXML := $(SHARED_GLAPI_APIXML)
@@ -49,7 +51,7 @@ $(SHARED_GLAPI_OUTPUTS): $(SHARED_GLAPI_DEPS)
 
 define gen-glapi
 	@mkdir -p $(dir $@)
-	$(PYTHON2) $(PYTHON_FLAGS) $< -f $(APIXML) $(1) > $@
+	$(PYTHON2) $(PYTHON_FLAGS) $< -f $(APIXML) -c $(GLES_VER) $(1) > $@
 endef
 
 %/glapi_mapi_tmp.h: $(MAPI)/mapi_abi.py $(COMMON)
@@ -58,7 +60,7 @@ endef
 		--printer $(PRINTER) --mode lib $(GLAPI)/gl_and_es_API.xml > $@
 
 %/main/dispatch.h: $(GLAPI)/gl_table.py $(COMMON)
-	$(call gen-glapi,-c -m remap_table)
+	$(call gen-glapi,-m remap_table)
 
 %/main/remap_helper.h: $(GLAPI)/remap_helper.py $(COMMON)
 	$(call gen-glapi)
diff --git a/src/mapi/glapi/gen/Makefile b/src/mapi/glapi/gen/Makefile
index 3e101f3a10f..c386b8766c4 100644
--- a/src/mapi/glapi/gen/Makefile
+++ b/src/mapi/glapi/gen/Makefile
@@ -180,10 +180,8 @@ $(MESA_GLAPI_DIR)/glapi_sparc.S: gl_SPARC_asm.py $(COMMON)
 
 ######################################################################
 
-$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON) $(ES_API)
-	$(PYTHON2) $(PYTHON_FLAGS) $< -f gl_API.xml \
-		-f $(MESA_GLAPI_DIR)/gen-es/es1_API.xml \
-		-f $(MESA_GLAPI_DIR)/gen-es/es2_API.xml > $@
+$(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON_ES)
+	$(PYTHON2) $(PYTHON_FLAGS) $< -f gl_and_es_API.xml > $@
 
 $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON)
 	$(PYTHON2) $(PYTHON_FLAGS) $< -m remap_table > $@
diff --git a/src/mapi/glapi/gen/gl_table.py b/src/mapi/glapi/gen/gl_table.py
index 05979e3813f..2cbbd971a86 100644
--- a/src/mapi/glapi/gen/gl_table.py
+++ b/src/mapi/glapi/gen/gl_table.py
@@ -211,28 +211,28 @@ class PrintRemapTable(gl_XML.gl_print_base):
 
 
 def show_usage():
-	print "Usage: %s [-f input_file_name] [-m mode] [-c]" % sys.argv[0]
+	print "Usage: %s [-f input_file_name] [-m mode] [-c ver]" % sys.argv[0]
 	print "    -m mode   Mode can be 'table' or 'remap_table'."
-	print "    -c        Enable compatibility with OpenGL ES."
+	print "    -c ver    Version can be 'es1' or 'es2'."
 	sys.exit(1)
 
 if __name__ == '__main__':
 	file_name = "gl_API.xml"
     
 	try:
-		(args, trail) = getopt.getopt(sys.argv[1:], "f:m:c")
+		(args, trail) = getopt.getopt(sys.argv[1:], "f:m:c:")
 	except Exception,e:
 		show_usage()
 
 	mode = "table"
-	es = False
+	es = None
 	for (arg,val) in args:
 		if arg == "-f":
 			file_name = val
 		elif arg == "-m":
 			mode = val
 		elif arg == "-c":
-			es = True
+			es = val
 
 	if mode == "table":
 		printer = PrintGlTable(es)
@@ -243,4 +243,14 @@ if __name__ == '__main__':
 
 	api = gl_XML.parse_GL_API( file_name )
 
+	if es is not None:
+		import gles_api
+
+		api_map = {
+			'es1': gles_api.es1_api,
+			'es2': gles_api.es2_api,
+		}
+
+		api.filter_functions(api_map[es])
+
 	printer.Print( api )
diff --git a/src/mapi/glapi/gen/remap_helper.py b/src/mapi/glapi/gen/remap_helper.py
index 69b8e5e9d02..367ae24c75c 100644
--- a/src/mapi/glapi/gen/remap_helper.py
+++ b/src/mapi/glapi/gen/remap_helper.py
@@ -197,22 +197,36 @@ class PrintGlRemap(gl_XML.gl_print_base):
 
 
 def show_usage():
-	print "Usage: %s [-f input_file_name]" % sys.argv[0]
+	print "Usage: %s [-f input_file_name] [-c ver]" % sys.argv[0]
+	print "    -c ver    Version can be 'es1' or 'es2'."
 	sys.exit(1)
 
 if __name__ == '__main__':
 	file_name = "gl_API.xml"
 
 	try:
-		(args, trail) = getopt.getopt(sys.argv[1:], "f:")
+		(args, trail) = getopt.getopt(sys.argv[1:], "f:c:")
 	except Exception,e:
 		show_usage()
 
+	es = None
 	for (arg,val) in args:
 		if arg == "-f":
 			file_name = val
+		elif arg == "-c":
+			es = val
 
 	api = gl_XML.parse_GL_API( file_name )
 
+	if es is not None:
+		import gles_api
+
+		api_map = {
+			'es1': gles_api.es1_api,
+			'es2': gles_api.es2_api,
+		}
+
+		api.filter_functions(api_map[es])
+
 	printer = PrintGlRemap()
 	printer.Print( api )
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index cbd16625186..ac85a3eeb05 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -349,26 +349,26 @@ if env['gles']:
     gles_headers += env.CodeGenerate(
         target = 'es1api/main/dispatch.h',
         script = GLAPI + 'gen/gl_table.py',
-        source = GLAPI + 'gen-es/es1_API.xml',
-        command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET',
+        source = GLAPI + 'gen/gl_and_es_API.xml',
+        command = python_cmd + ' $SCRIPT -c es1 -m remap_table -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
         target = 'es1api/main/remap_helper.h',
         script = GLAPI + 'gen/remap_helper.py',
-        source = GLAPI + 'gen-es/es1_API.xml',
-        command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET',
+        source = GLAPI + 'gen/gl_and_es_API.xml',
+        command = python_cmd + ' $SCRIPT -c es1 -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
         target = 'es2api/main/dispatch.h',
         script = GLAPI + 'gen/gl_table.py',
-        source = GLAPI + 'gen-es/es2_API.xml',
-        command = python_cmd + ' $SCRIPT -c -m remap_table -f $SOURCE > $TARGET',
+        source = GLAPI + 'gen/gl_and_es_API.xml',
+        command = python_cmd + ' $SCRIPT -c es2 -m remap_table -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
         target = 'es2api/main/remap_helper.h',
         script = GLAPI + 'gen/remap_helper.py',
-        source = GLAPI + 'gen-es/es2_API.xml',
-        command = python_cmd + ' $SCRIPT -f $SOURCE > $TARGET',
+        source = GLAPI + 'gen/gl_and_es_API.xml',
+        command = python_cmd + ' $SCRIPT -c es2 -f $SOURCE > $TARGET',
     )
 
     env.Depends(gles_sources, gles_headers)

From 786e5a2fb47e39c67a342eb22d84c665d058ee10 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Mon, 8 Aug 2011 10:40:12 +0900
Subject: [PATCH 288/600] glapi: add glapi_gen.mk to help header generation

glapi_gen.mk is supposed to be included by glapi users to simplify
header generation.  This commit also makes es1api, es2api, and
shared-glapi use it.

Reviewed-by: Brian Paul <brianp@vmware.com>

[olv: updated after reviewing to prefix all variables in glapi_gen.mk by
 glapi_gen]
---
 src/mapi/es1api/Makefile        | 25 ++++++++++---------
 src/mapi/glapi/gen/glapi_gen.mk | 44 +++++++++++++++++++++++++++++++++
 src/mapi/shared-glapi/Makefile  |  9 +++----
 3 files changed, 61 insertions(+), 17 deletions(-)
 create mode 100644 src/mapi/glapi/gen/glapi_gen.mk

diff --git a/src/mapi/es1api/Makefile b/src/mapi/es1api/Makefile
index aef694866c2..da5aa45806c 100644
--- a/src/mapi/es1api/Makefile
+++ b/src/mapi/es1api/Makefile
@@ -34,8 +34,6 @@ ESAPI = $(ES)api
 
 GLAPI := $(TOP)/src/mapi/glapi
 MAPI := $(TOP)/src/mapi/mapi
-# directory for generated sources/headers
-GEN := glapi
 
 esapi_CPPFLAGS := \
 	-I$(TOP)/include \
@@ -50,7 +48,7 @@ esapi_CPPFLAGS += -DMAPI_MODE_BRIDGE
 esapi_LIB_DEPS := -L$(TOP)/$(LIB_DIR) -l$(GLAPI_LIB) $(esapi_LIB_DEPS)
 
 .PHONY: default
-default: depend $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME)
+default: depend $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME) main/dispatch.h main/remap_helper.h
 
 $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME): $(esapi_OBJECTS)
 	$(MKLIB) -o $(esapi_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
@@ -68,13 +66,17 @@ lib$(ESAPI).a: $(esapi_OBJECTS)
 $(esapi_OBJECTS): %.o: $(MAPI)/%.c
 	$(CC) -c $(esapi_CPPFLAGS) $(CFLAGS) $< -o $@
 
-$(esapi_SOURCES): | glapi-stamp
+$(esapi_SOURCES): glapi_mapi_tmp.h
 
-.PHONY: glapi-stamp
-glapi-stamp:
-	@# generate sources/headers
-	@$(MAKE) -C $(GLAPI)/gen-es $(ES)
-	@touch $@
+include $(GLAPI)/gen/glapi_gen.mk
+glapi_mapi_tmp.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
+	$(call glapi_gen_mapi,$<,$(ESAPI))
+
+main/dispatch.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_dispatch_deps)
+	$(call glapi_gen_dispatch,$<,$(ES))
+
+main/remap_helper.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_remap_deps)
+	$(call glapi_gen_remap,$<,$(ES))
 
 .PHONY: clean
 clean:
@@ -83,9 +85,8 @@ clean:
 	-rm -f lib$(ESAPI).a
 	-rm -f $(esapi_OBJECTS)
 	-rm -f depend depend.bak
-	-rm -f glapi-stamp
-	@# clean generated sources/headers
-	@$(MAKE) -C $(GLAPI)/gen-es clean-$(ES)
+	-rm -f glapi_mapi_tmp.h
+	-rm -rf main
 
 pcedit = \
 	-e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \
diff --git a/src/mapi/glapi/gen/glapi_gen.mk b/src/mapi/glapi/gen/glapi_gen.mk
new file mode 100644
index 00000000000..c7fa7c0153d
--- /dev/null
+++ b/src/mapi/glapi/gen/glapi_gen.mk
@@ -0,0 +1,44 @@
+# Helpers for glapi header generation
+
+ifndef TOP
+$(error TOP must be defined.)
+endif
+
+glapi_gen_common_deps := \
+	$(wildcard $(TOP)/src/mapi/glapi/gen/*.xml) \
+	$(wildcard $(TOP)/src/mapi/glapi/gen/*.py)
+
+glapi_gen_mapi_script := $(TOP)/src/mapi/mapi/mapi_abi.py
+glapi_gen_mapi_deps := \
+	$(glapi_gen_mapi_script) \
+	$(glapi_gen_common_deps)
+
+# $(1): path to an XML file
+# $(2): name of the printer
+define glapi_gen_mapi
+@mkdir -p $(dir $@)
+$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_mapi_script) \
+	--mode lib --printer $(2) $(1) > $@
+endef
+
+glapi_gen_dispatch_script := $(TOP)/src/mapi/glapi/gen/gl_table.py
+glapi_gen_dispatch_deps := $(glapi_gen_common_deps)
+
+# $(1): path to an XML file
+# $(2): empty, es1, or es2 for entry point filtering
+define glapi_gen_dispatch
+@mkdir -p $(dir $@)
+$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_dispatch_script) \
+	-f $(1) -m remap_table $(if $(2),-c $(2),) > $@
+endef
+
+glapi_gen_remap_script := $(TOP)/src/mapi/glapi/gen/remap_helper.py
+glapi_gen_remap_deps := $(glapi_gen_common_deps)
+
+# $(1): path to an XML file
+# $(2): empty, es1, or es2 for entry point filtering
+define glapi_gen_remap
+@mkdir -p $(dir $@)
+$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_remap_script) \
+	-f $(1) $(if $(2),-c $(2),) > $@
+endef
diff --git a/src/mapi/shared-glapi/Makefile b/src/mapi/shared-glapi/Makefile
index c928f822c81..3de864d891c 100644
--- a/src/mapi/shared-glapi/Makefile
+++ b/src/mapi/shared-glapi/Makefile
@@ -34,17 +34,16 @@ $(glapi_OBJECTS): %.o: $(MAPI)/%.c
 
 $(glapi_SOURCES): glapi_mapi_tmp.h
 
-.PHONY: glapi_mapi_tmp.h
-glapi_mapi_tmp.h:
-	@$(MAKE) -C $(GLAPI)/gen-es shared-glapi
+include $(GLAPI)/gen/glapi_gen.mk
+glapi_mapi_tmp.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
+	$(call glapi_gen_mapi,$<,shared-glapi)
 
 .PHONY: clean
 clean:
 	-rm -f $(TOP)/$(LIB_DIR)/$(GLAPI_LIB_NAME)
 	-rm -f $(glapi_OBJECTS)
 	-rm -f depend depend.bak
-	@# clean generated sources/headers
-	@$(MAKE) -C $(GLAPI)/gen-es clean-shared-glapi
+	-rm -f glapi_mapi_tmp.h
 
 install:
 	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)

From 6eff33dc7f2cd6e1430bd8dcaef4a7eb9fe3f6ee Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Thu, 11 Aug 2011 16:41:09 +0800
Subject: [PATCH 289/600] glapi: generate ES dispatch headers from core mesa

GLESv1 and GLESv2 have their own dispatch.h and remap_helper.h.  These
headers are only used by api_exec_es1.c and api_exec_es2.c in core mesa.
Move the rules to generate them from glapi to core mesa.

Reviewed-by: Brian Paul <brianp@vmware.com>

[olv: updated after reviewing to fix SCons build]
---
 src/mapi/es1api/Makefile      |  9 +--------
 src/mesa/Makefile             | 20 ++++++++++++++++++++
 src/mesa/SConscript           |  8 ++++----
 src/mesa/main/es_generator.py |  4 ++--
 4 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/src/mapi/es1api/Makefile b/src/mapi/es1api/Makefile
index da5aa45806c..0a0449b10a3 100644
--- a/src/mapi/es1api/Makefile
+++ b/src/mapi/es1api/Makefile
@@ -48,7 +48,7 @@ esapi_CPPFLAGS += -DMAPI_MODE_BRIDGE
 esapi_LIB_DEPS := -L$(TOP)/$(LIB_DIR) -l$(GLAPI_LIB) $(esapi_LIB_DEPS)
 
 .PHONY: default
-default: depend $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME) main/dispatch.h main/remap_helper.h
+default: depend $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME)
 
 $(TOP)/$(LIB_DIR)/$(esapi_LIB_NAME): $(esapi_OBJECTS)
 	$(MKLIB) -o $(esapi_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
@@ -72,12 +72,6 @@ include $(GLAPI)/gen/glapi_gen.mk
 glapi_mapi_tmp.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
 	$(call glapi_gen_mapi,$<,$(ESAPI))
 
-main/dispatch.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_dispatch_deps)
-	$(call glapi_gen_dispatch,$<,$(ES))
-
-main/remap_helper.h: $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_remap_deps)
-	$(call glapi_gen_remap,$<,$(ES))
-
 .PHONY: clean
 clean:
 	-rm -f $(esapi_PC)
@@ -86,7 +80,6 @@ clean:
 	-rm -f $(esapi_OBJECTS)
 	-rm -f depend depend.bak
 	-rm -f glapi_mapi_tmp.h
-	-rm -rf main
 
 pcedit = \
 	-e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \
diff --git a/src/mesa/Makefile b/src/mesa/Makefile
index 88f31b68695..0e15d61bd8d 100644
--- a/src/mesa/Makefile
+++ b/src/mesa/Makefile
@@ -67,6 +67,26 @@ $(DRICORE_OBJ_DIR)/%.o: %.S
 # then convenience libs (.a) and finally the device drivers:
 default: $(DEPENDS) asm_subdirs $(MESA_LIBS) $(DRICORE_LIBS) driver_subdirs
 
+# include glapi_gen.mk for generating glapi headers for GLES
+GLAPI := $(TOP)/src/mapi/glapi/gen
+include $(GLAPI)/glapi_gen.mk
+
+main/api_exec_es1_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps)
+	$(call glapi_gen_dispatch,$<,es1)
+
+main/api_exec_es1_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps)
+	$(call glapi_gen_remap,$<,es1)
+
+main/api_exec_es1.o: main/api_exec_es1_dispatch.h main/api_exec_es1_remap_helper.h 
+
+main/api_exec_es2_dispatch.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_dispatch_deps)
+	$(call glapi_gen_dispatch,$<,es2)
+
+main/api_exec_es2_remap_helper.h: $(GLAPI)/gl_and_es_API.xml $(glapi_gen_remap_deps)
+	$(call glapi_gen_remap,$<,es2)
+
+main/api_exec_es2.o: main/api_exec_es2_dispatch.h main/api_exec_es2_remap_helper.h 
+
 main/api_exec_es1.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py main/APIspec.py
 	$(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES1.1 > $@
 
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index ac85a3eeb05..05aa0e8010e 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -347,25 +347,25 @@ if env['gles']:
     GLAPI = '#src/mapi/glapi/'
     gles_headers = []
     gles_headers += env.CodeGenerate(
-        target = 'es1api/main/dispatch.h',
+        target = 'main/api_exec_es1_dispatch.h',
         script = GLAPI + 'gen/gl_table.py',
         source = GLAPI + 'gen/gl_and_es_API.xml',
         command = python_cmd + ' $SCRIPT -c es1 -m remap_table -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
-        target = 'es1api/main/remap_helper.h',
+        target = 'main/api_exec_es1_remap_helper.h',
         script = GLAPI + 'gen/remap_helper.py',
         source = GLAPI + 'gen/gl_and_es_API.xml',
         command = python_cmd + ' $SCRIPT -c es1 -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
-        target = 'es2api/main/dispatch.h',
+        target = 'main/api_exec_es2_dispatch.h',
         script = GLAPI + 'gen/gl_table.py',
         source = GLAPI + 'gen/gl_and_es_API.xml',
         command = python_cmd + ' $SCRIPT -c es2 -m remap_table -f $SOURCE > $TARGET',
     )
     gles_headers += env.CodeGenerate(
-        target = 'es2api/main/remap_helper.h',
+        target = 'main/api_exec_es2_remap_helper.h',
         script = GLAPI + 'gen/remap_helper.py',
         source = GLAPI + 'gen/gl_and_es_API.xml',
         command = python_cmd + ' $SCRIPT -c es2 -f $SOURCE > $TARGET',
diff --git a/src/mesa/main/es_generator.py b/src/mesa/main/es_generator.py
index c0b0a445806..cad3deaef94 100644
--- a/src/mesa/main/es_generator.py
+++ b/src/mesa/main/es_generator.py
@@ -681,10 +681,10 @@ print """
 #if FEATURE_remap_table
 
 /* define esLocalRemapTable */
-#include "%sapi/main/dispatch.h"
+#include "main/api_exec_%s_dispatch.h"
 
 #define need_MESA_remap_table
-#include "%sapi/main/remap_helper.h"
+#include "main/api_exec_%s_remap_helper.h"
 
 static void
 init_remap_table(void)

From e09b706c9e7d9308ad787e857a9456334426e5fd Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sun, 7 Aug 2011 23:57:51 +0900
Subject: [PATCH 290/600] glapi: remove gen-es

Not used anymore.
---
 src/mapi/glapi/gen-es/Makefile           |   93 -
 src/mapi/glapi/gen-es/base1_API.xml      |  744 ------
 src/mapi/glapi/gen-es/base2_API.xml      |  533 -----
 src/mapi/glapi/gen-es/es1_API.xml        | 1100 ---------
 src/mapi/glapi/gen-es/es1_COMPAT.xml     |  135 --
 src/mapi/glapi/gen-es/es1_EXT.xml        |  699 ------
 src/mapi/glapi/gen-es/es2_API.xml        |  294 ---
 src/mapi/glapi/gen-es/es2_COMPAT.xml     |  368 ---
 src/mapi/glapi/gen-es/es2_EXT.xml        |  162 --
 src/mapi/glapi/gen-es/es_COMPAT.xml      | 2646 ----------------------
 src/mapi/glapi/gen-es/es_EXT.xml         |  125 -
 src/mapi/glapi/gen-es/gl_compare.py      |  354 ---
 src/mapi/glapi/gen-es/gl_parse_header.py |  450 ----
 13 files changed, 7703 deletions(-)
 delete mode 100644 src/mapi/glapi/gen-es/Makefile
 delete mode 100644 src/mapi/glapi/gen-es/base1_API.xml
 delete mode 100644 src/mapi/glapi/gen-es/base2_API.xml
 delete mode 100644 src/mapi/glapi/gen-es/es1_API.xml
 delete mode 100644 src/mapi/glapi/gen-es/es1_COMPAT.xml
 delete mode 100644 src/mapi/glapi/gen-es/es1_EXT.xml
 delete mode 100644 src/mapi/glapi/gen-es/es2_API.xml
 delete mode 100644 src/mapi/glapi/gen-es/es2_COMPAT.xml
 delete mode 100644 src/mapi/glapi/gen-es/es2_EXT.xml
 delete mode 100644 src/mapi/glapi/gen-es/es_COMPAT.xml
 delete mode 100644 src/mapi/glapi/gen-es/es_EXT.xml
 delete mode 100644 src/mapi/glapi/gen-es/gl_compare.py
 delete mode 100644 src/mapi/glapi/gen-es/gl_parse_header.py

diff --git a/src/mapi/glapi/gen-es/Makefile b/src/mapi/glapi/gen-es/Makefile
deleted file mode 100644
index 3fd539d26d5..00000000000
--- a/src/mapi/glapi/gen-es/Makefile
+++ /dev/null
@@ -1,93 +0,0 @@
-TOP = ../../../..
-MAPI = $(TOP)/src/mapi/mapi
-GLAPI = ../gen
-include $(TOP)/configs/current
-
-OUTPUTS :=			\
-	glapi_mapi_tmp.h	\
-	main/dispatch.h		\
-	main/remap_helper.h
-
-COMMON = gl_and_es_API.xml gl_XML.py glX_XML.py license.py typeexpr.py
-COMMON := $(addprefix $(GLAPI)/, $(COMMON))
-
-ES1_APIXML := $(GLAPI)/gl_and_es_API.xml
-ES2_APIXML := $(GLAPI)/gl_and_es_API.xml
-ES1_OUTPUT_DIR := $(TOP)/src/mapi/es1api
-ES2_OUTPUT_DIR := $(TOP)/src/mapi/es2api
-
-ES1_DEPS = $(ES1_APIXML) base1_API.xml es1_EXT.xml es_EXT.xml \
-	   es1_COMPAT.xml es_COMPAT.xml
-ES2_DEPS = $(ES2_APIXML) base2_API.xml es2_EXT.xml es_EXT.xml \
-	   es2_COMPAT.xml es_COMPAT.xml
-
-ES1_OUTPUTS := $(addprefix $(ES1_OUTPUT_DIR)/, $(OUTPUTS))
-ES2_OUTPUTS := $(addprefix $(ES2_OUTPUT_DIR)/, $(OUTPUTS))
-
-SHARED_GLAPI_APIXML := $(GLAPI)/gl_and_es_API.xml
-SHARED_GLAPI_OUTPUT_DIR := $(TOP)/src/mapi/shared-glapi
-SHARED_GLAPI_DEPS := $(SHARED_GLAPI_APIXML)
-SHARED_GLAPI_OUTPUTS = $(SHARED_GLAPI_OUTPUT_DIR)/glapi_mapi_tmp.h
-
-all: es1 es2 shared-glapi
-
-es1: $(ES1_OUTPUTS)
-es2: $(ES2_OUTPUTS)
-shared-glapi: $(SHARED_GLAPI_OUTPUTS)
-
-$(ES1_OUTPUTS): APIXML := $(ES1_APIXML)
-$(ES1_OUTPUTS): PRINTER := es1api
-$(ES1_OUTPUTS): GLES_VER := es1
-$(ES1_OUTPUTS): $(ES1_DEPS)
-
-$(ES2_OUTPUTS): APIXML := $(ES2_APIXML)
-$(ES2_OUTPUTS): PRINTER := es2api
-$(ES2_OUTPUTS): GLES_VER := es2
-$(ES2_OUTPUTS): $(ES2_DEPS)
-
-$(SHARED_GLAPI_OUTPUTS): APIXML := $(SHARED_GLAPI_APIXML)
-$(SHARED_GLAPI_OUTPUTS): PRINTER := shared-glapi
-$(SHARED_GLAPI_OUTPUTS): $(SHARED_GLAPI_DEPS)
-
-define gen-glapi
-	@mkdir -p $(dir $@)
-	$(PYTHON2) $(PYTHON_FLAGS) $< -f $(APIXML) -c $(GLES_VER) $(1) > $@
-endef
-
-%/glapi_mapi_tmp.h: $(MAPI)/mapi_abi.py $(COMMON)
-	@mkdir -p $(dir $@)
-	$(PYTHON2) $(PYTHON_FLAGS) $< \
-		--printer $(PRINTER) --mode lib $(GLAPI)/gl_and_es_API.xml > $@
-
-%/main/dispatch.h: $(GLAPI)/gl_table.py $(COMMON)
-	$(call gen-glapi,-m remap_table)
-
-%/main/remap_helper.h: $(GLAPI)/remap_helper.py $(COMMON)
-	$(call gen-glapi)
-
-verify_xml:
-	@if [ ! -f gl.h ]; then \
-		echo "Please copy gl.h and gl2.h to this directory"; \
-		exit 1; \
-	fi
-	@echo "Verifying that es1_API.xml covers OpenGL ES 1.1..."
-	@$(PYTHON2) $(PYTHON_FLAGS) gl_parse_header.py gl.h > tmp.xml
-	@$(PYTHON2) $(PYTHON_FLAGS) gl_compare.py difference tmp.xml es1_API.xml
-	@echo "Verifying that es2_API.xml covers OpenGL ES 2.0..."
-	@$(PYTHON2) $(PYTHON_FLAGS) gl_parse_header.py gl2.h > tmp.xml
-	@$(PYTHON2) $(PYTHON_FLAGS) gl_compare.py difference tmp.xml es2_API.xml
-	@rm -f tmp.xml
-
-clean-es1:
-	-rm -f $(ES1_OUTPUTS)
-	-rm -rf $(ES1_OUTPUT_DIR)/main
-
-clean-es2:
-	-rm -f $(ES2_OUTPUTS)
-	-rm -rf $(ES2_OUTPUT_DIR)/main
-
-clean-shared-glapi:
-	-rm -f $(SHARED_GLAPI_OUTPUTS)
-
-clean: clean-es1 clean-es2 clean-shared-glapi
-	-rm -f *~ *.pyc *.pyo
diff --git a/src/mapi/glapi/gen-es/base1_API.xml b/src/mapi/glapi/gen-es/base1_API.xml
deleted file mode 100644
index 720be257ca2..00000000000
--- a/src/mapi/glapi/gen-es/base1_API.xml
+++ /dev/null
@@ -1,744 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL and OpenGL ES 1.x APIs
-     This file defines the base categories that can be shared by all APIs.
-     They are defined in an incremental fashion.
--->
-
-<OpenGLAPI>
-
-<!-- base subset of OpenGL 1.0 -->
-<category name="base1.0">
-    <enum name="FALSE"                                    value="0x0"/>
-    <enum name="TRUE"                                     value="0x1"/>
-    <enum name="ZERO"                                     value="0x0"/>
-    <enum name="ONE"                                      value="0x1"/>
-    <enum name="NO_ERROR"                                 value="0x0"/>
-
-    <enum name="POINTS"                                   value="0x0000"/>
-    <enum name="LINES"                                    value="0x0001"/>
-    <enum name="LINE_LOOP"                                value="0x0002"/>
-    <enum name="LINE_STRIP"                               value="0x0003"/>
-    <enum name="TRIANGLES"                                value="0x0004"/>
-    <enum name="TRIANGLE_STRIP"                           value="0x0005"/>
-    <enum name="TRIANGLE_FAN"                             value="0x0006"/>
-    <enum name="NEVER"                                    value="0x0200"/>
-    <enum name="LESS"                                     value="0x0201"/>
-    <enum name="EQUAL"                                    value="0x0202"/>
-    <enum name="LEQUAL"                                   value="0x0203"/>
-    <enum name="GREATER"                                  value="0x0204"/>
-    <enum name="NOTEQUAL"                                 value="0x0205"/>
-    <enum name="GEQUAL"                                   value="0x0206"/>
-    <enum name="ALWAYS"                                   value="0x0207"/>
-    <enum name="SRC_COLOR"                                value="0x0300"/>
-    <enum name="ONE_MINUS_SRC_COLOR"                      value="0x0301"/>
-    <enum name="SRC_ALPHA"                                value="0x0302"/>
-    <enum name="ONE_MINUS_SRC_ALPHA"                      value="0x0303"/>
-    <enum name="DST_ALPHA"                                value="0x0304"/>
-    <enum name="ONE_MINUS_DST_ALPHA"                      value="0x0305"/>
-    <enum name="DST_COLOR"                                value="0x0306"/>
-    <enum name="ONE_MINUS_DST_COLOR"                      value="0x0307"/>
-    <enum name="SRC_ALPHA_SATURATE"                       value="0x0308"/>
-    <enum name="FRONT"                                    value="0x0404"/>
-    <enum name="BACK"                                     value="0x0405"/>
-    <enum name="FRONT_AND_BACK"                           value="0x0408"/>
-    <enum name="INVALID_ENUM"                             value="0x0500"/>
-    <enum name="INVALID_VALUE"                            value="0x0501"/>
-    <enum name="INVALID_OPERATION"                        value="0x0502"/>
-    <enum name="OUT_OF_MEMORY"                            value="0x0505"/>
-    <enum name="CW"                                       value="0x0900"/>
-    <enum name="CCW"                                      value="0x0901"/>
-    <enum name="CULL_FACE"                     count="1"  value="0x0B44">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_TEST"                    count="1"  value="0x0B71">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_TEST"                  count="1"  value="0x0B90">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DITHER"                        count="1"  value="0x0BD0">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND"                         count="1"  value="0x0BE2">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SCISSOR_TEST"                  count="1"  value="0x0C11">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="UNPACK_ALIGNMENT"              count="1"  value="0x0CF5">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="PACK_ALIGNMENT"                count="1"  value="0x0D05">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_TEXTURE_SIZE"              count="1"  value="0x0D33">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_VIEWPORT_DIMS"             count="2"  value="0x0D3A">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SUBPIXEL_BITS"                 count="1"  value="0x0D50">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="RED_BITS"                      count="1"  value="0x0D52">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="GREEN_BITS"                    count="1"  value="0x0D53">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLUE_BITS"                     count="1"  value="0x0D54">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ALPHA_BITS"                    count="1"  value="0x0D55">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_BITS"                    count="1"  value="0x0D56">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_BITS"                  count="1"  value="0x0D57">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_2D"                    count="1"  value="0x0DE1">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DONT_CARE"                                value="0x1100"/>
-    <enum name="FASTEST"                                  value="0x1101"/>
-    <enum name="NICEST"                                   value="0x1102"/>
-    <enum name="BYTE"                          count="1"  value="0x1400">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="UNSIGNED_BYTE"                 count="1"  value="0x1401">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="SHORT"                         count="2"  value="0x1402">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="UNSIGNED_SHORT"                count="2"  value="0x1403">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="FLOAT"                         count="4"  value="0x1406">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="INVERT"                                   value="0x150A"/>
-    <enum name="TEXTURE"                                  value="0x1702"/>
-    <enum name="ALPHA"                                    value="0x1906"/>
-    <enum name="RGB"                                      value="0x1907"/>
-    <enum name="RGBA"                                     value="0x1908"/>
-    <enum name="LUMINANCE"                                value="0x1909"/>
-    <enum name="LUMINANCE_ALPHA"                          value="0x190A"/>
-    <enum name="KEEP"                                     value="0x1E00"/>
-    <enum name="REPLACE"                                  value="0x1E01"/>
-    <enum name="INCR"                                     value="0x1E02"/>
-    <enum name="DECR"                                     value="0x1E03"/>
-    <enum name="VENDOR"                                   value="0x1F00"/>
-    <enum name="RENDERER"                                 value="0x1F01"/>
-    <enum name="VERSION"                                  value="0x1F02"/>
-    <enum name="EXTENSIONS"                               value="0x1F03"/>
-    <enum name="NEAREST"                                  value="0x2600"/>
-    <enum name="LINEAR"                                   value="0x2601"/>
-    <enum name="NEAREST_MIPMAP_NEAREST"                   value="0x2700"/>
-    <enum name="LINEAR_MIPMAP_NEAREST"                    value="0x2701"/>
-    <enum name="NEAREST_MIPMAP_LINEAR"                    value="0x2702"/>
-    <enum name="LINEAR_MIPMAP_LINEAR"                     value="0x2703"/>
-    <enum name="TEXTURE_MAG_FILTER"            count="1"  value="0x2800">
-        <size name="TexParameterfv"/>
-        <size name="TexParameteriv"/>
-        <size name="GetTexParameterfv" mode="get"/>
-        <size name="GetTexParameteriv" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_MIN_FILTER"            count="1"  value="0x2801">
-        <size name="TexParameterfv"/>
-        <size name="TexParameteriv"/>
-        <size name="GetTexParameterfv" mode="get"/>
-        <size name="GetTexParameteriv" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_WRAP_S"                count="1"  value="0x2802">
-        <size name="TexParameterfv"/>
-        <size name="TexParameteriv"/>
-        <size name="GetTexParameterfv" mode="get"/>
-        <size name="GetTexParameteriv" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_WRAP_T"                count="1"  value="0x2803">
-        <size name="TexParameterfv"/>
-        <size name="TexParameteriv"/>
-        <size name="GetTexParameterfv" mode="get"/>
-        <size name="GetTexParameteriv" mode="get"/>
-    </enum>
-    <enum name="REPEAT"                                   value="0x2901"/>
-
-    <enum name="DEPTH_BUFFER_BIT"                         value="0x00000100"/>
-    <enum name="STENCIL_BUFFER_BIT"                       value="0x00000400"/>
-    <enum name="COLOR_BUFFER_BIT"                         value="0x00004000"/>
-
-    <type name="float"   size="4"  float="true"    glx_name="FLOAT32"/>
-    <type name="clampf"  size="4"  float="true"    glx_name="FLOAT32"/>
-
-    <type name="int"     size="4"                  glx_name="CARD32"/>
-    <type name="uint"    size="4"  unsigned="true" glx_name="CARD32"/>
-    <type name="sizei"   size="4"                  glx_name="CARD32"/>
-    <type name="enum"    size="4"  unsigned="true" glx_name="ENUM"/>
-    <type name="bitfield" size="4" unsigned="true" glx_name="CARD32"/>
-
-    <type name="short"   size="2"                  glx_name="CARD16"/>
-    <type name="ushort"  size="2"  unsigned="true" glx_name="CARD16"/>
-
-    <type name="byte"    size="1"                  glx_name="CARD8"/>
-    <type name="ubyte"   size="1"  unsigned="true" glx_name="CARD8"/>
-    <type name="boolean" size="1"  unsigned="true" glx_name="CARD8"/>
-
-    <type name="void"    size="1"/>
-
-    <function name="BlendFunc" offset="241">
-        <param name="sfactor" type="GLenum"/>
-        <param name="dfactor" type="GLenum"/>
-        <glx rop="160"/>
-    </function>
-
-    <function name="Clear" offset="203">
-        <param name="mask" type="GLbitfield"/>
-        <glx rop="127"/>
-    </function>
-
-    <function name="ClearColor" offset="206">
-        <param name="red" type="GLclampf"/>
-        <param name="green" type="GLclampf"/>
-        <param name="blue" type="GLclampf"/>
-        <param name="alpha" type="GLclampf"/>
-        <glx rop="130"/>
-    </function>
-
-    <function name="ClearStencil" offset="207">
-        <param name="s" type="GLint"/>
-        <glx rop="131"/>
-    </function>
-
-    <function name="ColorMask" offset="210">
-        <param name="red" type="GLboolean"/>
-        <param name="green" type="GLboolean"/>
-        <param name="blue" type="GLboolean"/>
-        <param name="alpha" type="GLboolean"/>
-        <glx rop="134"/>
-    </function>
-
-    <function name="CullFace" offset="152">
-        <param name="mode" type="GLenum"/>
-        <glx rop="79"/>
-    </function>
-
-    <function name="DepthFunc" offset="245">
-        <param name="func" type="GLenum"/>
-        <glx rop="164"/>
-    </function>
-
-    <function name="DepthMask" offset="211">
-        <param name="flag" type="GLboolean"/>
-        <glx rop="135"/>
-    </function>
-
-    <function name="Disable" offset="214">
-        <param name="cap" type="GLenum"/>
-        <glx rop="138" handcode="client"/>
-    </function>
-
-    <function name="Enable" offset="215">
-        <param name="cap" type="GLenum"/>
-        <glx rop="139" handcode="client"/>
-    </function>
-
-    <function name="Finish" offset="216">
-        <glx sop="108" handcode="true"/>
-    </function>
-
-    <function name="Flush" offset="217">
-        <glx sop="142" handcode="true"/>
-    </function>
-
-    <function name="FrontFace" offset="157">
-        <param name="mode" type="GLenum"/>
-        <glx rop="84"/>
-    </function>
-
-    <function name="GetError" offset="261">
-        <return type="GLenum"/>
-        <glx sop="115" handcode="client"/>
-    </function>
-
-    <function name="GetIntegerv" offset="263">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="117" handcode="client"/>
-    </function>
-
-    <function name="GetString" offset="275">
-        <param name="name" type="GLenum"/>
-        <return type="const GLubyte *"/>
-        <glx sop="129" handcode="true"/>
-    </function>
-
-    <function name="Hint" offset="158">
-        <param name="target" type="GLenum"/>
-        <param name="mode" type="GLenum"/>
-        <glx rop="85"/>
-    </function>
-
-    <function name="LineWidth" offset="168">
-        <param name="width" type="GLfloat"/>
-        <glx rop="95"/>
-    </function>
-
-    <function name="PixelStorei" offset="250">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx sop="110" handcode="client"/>
-    </function>
-
-    <function name="ReadPixels" offset="256">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="GLvoid *" output="true"  img_width="width" img_height="height" img_format="format" img_type="type" img_target="0"/>
-        <glx sop="111"/>
-    </function>
-
-    <function name="Scissor" offset="176">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="103"/>
-    </function>
-
-    <function name="StencilFunc" offset="243">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLint"/>
-        <param name="mask" type="GLuint"/>
-        <glx rop="162"/>
-    </function>
-
-    <function name="StencilMask" offset="209">
-        <param name="mask" type="GLuint"/>
-        <glx rop="133"/>
-    </function>
-
-    <function name="StencilOp" offset="244">
-        <param name="fail" type="GLenum"/>
-        <param name="zfail" type="GLenum"/>
-        <param name="zpass" type="GLenum"/>
-        <glx rop="163"/>
-    </function>
-
-    <function name="TexParameterf" offset="178">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="105"/>
-    </function>
-
-    <function name="Viewport" offset="305">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="191"/>
-    </function>
-
-    <!-- these are not in OpenGL ES 1.0 -->
-    <enum name="LINE_WIDTH"                    count="1"  value="0x0B21">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CULL_FACE_MODE"                count="1"  value="0x0B45">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FRONT_FACE"                    count="1"  value="0x0B46">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_RANGE"                   count="2"  value="0x0B70">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_WRITEMASK"               count="1"  value="0x0B72">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_CLEAR_VALUE"             count="1"  value="0x0B73">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_FUNC"                    count="1"  value="0x0B74">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_CLEAR_VALUE"           count="1"  value="0x0B91">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_FUNC"                  count="1"  value="0x0B92">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_VALUE_MASK"            count="1"  value="0x0B93">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_FAIL"                  count="1"  value="0x0B94">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_PASS_DEPTH_FAIL"       count="1"  value="0x0B95">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_PASS_DEPTH_PASS"       count="1"  value="0x0B96">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_REF"                   count="1"  value="0x0B97">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_WRITEMASK"             count="1"  value="0x0B98">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="VIEWPORT"                      count="4"  value="0x0BA2">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SCISSOR_BOX"                   count="4"  value="0x0C10">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_CLEAR_VALUE"             count="4"  value="0x0C22">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_WRITEMASK"               count="4"  value="0x0C23">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="TexParameterfv" offset="179">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="106"/>
-    </function>
-
-    <function name="TexParameteri" offset="180">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="107"/>
-    </function>
-
-    <function name="TexParameteriv" offset="181">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="108"/>
-    </function>
-
-    <function name="GetBooleanv" offset="258">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLboolean *" output="true" variable_param="pname"/>
-        <glx sop="112" handcode="client"/>
-    </function>
-
-    <function name="GetFloatv" offset="262">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="116" handcode="client"/>
-    </function>
-
-    <function name="GetTexParameterfv" offset="282">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="136"/>
-    </function>
-
-    <function name="GetTexParameteriv" offset="283">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="137"/>
-    </function>
-
-    <function name="IsEnabled" offset="286">
-        <param name="cap" type="GLenum"/>
-        <return type="GLboolean"/>
-        <glx sop="140" handcode="client"/>
-    </function>
-</category>
-
-<!-- base subset of OpenGL 1.1 -->
-<category name="base1.1">
-    <enum name="POLYGON_OFFSET_FILL"                      value="0x8037"/>
-
-    <function name="BindTexture" offset="307">
-        <param name="target" type="GLenum"/>
-        <param name="texture" type="GLuint"/>
-        <glx rop="4117"/>
-    </function>
-
-    <function name="CopyTexImage2D" offset="324">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <glx rop="4120"/>
-    </function>
-
-    <function name="CopyTexSubImage2D" offset="326">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4122"/>
-    </function>
-
-    <function name="DeleteTextures" offset="327">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="textures" type="const GLuint *" count="n"/>
-        <glx sop="144"/>
-    </function>
-
-    <function name="DrawArrays" offset="310">
-        <param name="mode" type="GLenum"/>
-        <param name="first" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <glx rop="193" handcode="true"/>
-    </function>
-
-    <function name="DrawElements" offset="311">
-        <param name="mode" type="GLenum"/>
-        <param name="count" type="GLsizei"/>
-        <param name="type" type="GLenum"/>
-        <param name="indices" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="GenTextures" offset="328">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="textures" type="GLuint *" output="true" count="n"/>
-        <glx sop="145" always_array="true"/>
-    </function>
-
-    <function name="PolygonOffset" offset="319">
-        <param name="factor" type="GLfloat"/>
-        <param name="units" type="GLfloat"/>
-        <glx rop="192"/>
-    </function>
-
-    <function name="TexSubImage2D" offset="333">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_xoff="xoffset" img_yoff="yoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4100" large="true"/>
-    </function>
-
-    <!-- these are not in OpenGL ES 1.0 -->
-    <enum name="POLYGON_OFFSET_UNITS"          count="1"  value="0x2A00">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POLYGON_OFFSET_FACTOR"         count="1"  value="0x8038">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_BINDING_2D"            count="1"  value="0x8069">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="IsTexture" offset="330">
-        <param name="texture" type="GLuint"/>
-        <return type="GLboolean"/>
-        <glx sop="146"/>
-    </function>
-</category>
-
-<!-- base subset of OpenGL 1.2 -->
-<category name="base1.2">
-    <enum name="UNSIGNED_SHORT_4_4_4_4"                   value="0x8033"/>
-    <enum name="UNSIGNED_SHORT_5_5_5_1"                   value="0x8034"/>
-    <enum name="CLAMP_TO_EDGE"                            value="0x812F"/>
-    <enum name="UNSIGNED_SHORT_5_6_5"                     value="0x8363"/>
-    <enum name="ALIASED_POINT_SIZE_RANGE"      count="2"  value="0x846D">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ALIASED_LINE_WIDTH_RANGE"      count="2"  value="0x846E">
-        <size name="Get" mode="get"/>
-    </enum>
-</category>
-
-<!-- base subset of OpenGL 1.3 -->
-<category name="base1.3">
-    <enum name="SAMPLE_ALPHA_TO_COVERAGE"      count="1"  value="0x809E">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SAMPLE_COVERAGE"               count="1"  value="0x80A0">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE0"                                 value="0x84C0"/>
-    <enum name="TEXTURE1"                                 value="0x84C1"/>
-    <enum name="TEXTURE2"                                 value="0x84C2"/>
-    <enum name="TEXTURE3"                                 value="0x84C3"/>
-    <enum name="TEXTURE4"                                 value="0x84C4"/>
-    <enum name="TEXTURE5"                                 value="0x84C5"/>
-    <enum name="TEXTURE6"                                 value="0x84C6"/>
-    <enum name="TEXTURE7"                                 value="0x84C7"/>
-    <enum name="TEXTURE8"                                 value="0x84C8"/>
-    <enum name="TEXTURE9"                                 value="0x84C9"/>
-    <enum name="TEXTURE10"                                value="0x84CA"/>
-    <enum name="TEXTURE11"                                value="0x84CB"/>
-    <enum name="TEXTURE12"                                value="0x84CC"/>
-    <enum name="TEXTURE13"                                value="0x84CD"/>
-    <enum name="TEXTURE14"                                value="0x84CE"/>
-    <enum name="TEXTURE15"                                value="0x84CF"/>
-    <enum name="TEXTURE16"                                value="0x84D0"/>
-    <enum name="TEXTURE17"                                value="0x84D1"/>
-    <enum name="TEXTURE18"                                value="0x84D2"/>
-    <enum name="TEXTURE19"                                value="0x84D3"/>
-    <enum name="TEXTURE20"                                value="0x84D4"/>
-    <enum name="TEXTURE21"                                value="0x84D5"/>
-    <enum name="TEXTURE22"                                value="0x84D6"/>
-    <enum name="TEXTURE23"                                value="0x84D7"/>
-    <enum name="TEXTURE24"                                value="0x84D8"/>
-    <enum name="TEXTURE25"                                value="0x84D9"/>
-    <enum name="TEXTURE26"                                value="0x84DA"/>
-    <enum name="TEXTURE27"                                value="0x84DB"/>
-    <enum name="TEXTURE28"                                value="0x84DC"/>
-    <enum name="TEXTURE29"                                value="0x84DD"/>
-    <enum name="TEXTURE30"                                value="0x84DE"/>
-    <enum name="TEXTURE31"                                value="0x84DF"/>
-    <enum name="NUM_COMPRESSED_TEXTURE_FORMATS" count="1" value="0x86A2">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COMPRESSED_TEXTURE_FORMATS"    count="-1"  value="0x86A3">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="ActiveTexture" offset="374">
-        <param name="texture" type="GLenum"/>
-        <glx rop="197"/>
-    </function>
-
-    <function name="CompressedTexImage2D" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="imageSize" type="GLsizei" counter="true"/>
-        <param name="data" type="const GLvoid *" count="imageSize"/>
-        <glx rop="215" handcode="client"/>
-    </function>
-
-    <function name="CompressedTexSubImage2D" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="imageSize" type="GLsizei" counter="true"/>
-        <param name="data" type="const GLvoid *" count="imageSize"/>
-        <glx rop="218" handcode="client"/>
-    </function>
-
-    <function name="SampleCoverage" offset="assign">
-        <param name="value" type="GLclampf"/>
-        <param name="invert" type="GLboolean"/>
-        <glx rop="229"/>
-    </function>
-
-    <!-- these are not in OpenGL ES 1.0 -->
-    <enum name="SAMPLE_BUFFERS"                count="1"  value="0x80A8">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SAMPLES"                       count="1"  value="0x80A9">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SAMPLE_COVERAGE_VALUE"         count="1"  value="0x80AA">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SAMPLE_COVERAGE_INVERT"        count="1"  value="0x80AB">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ACTIVE_TEXTURE"                count="1"  value="0x84E0">
-        <size name="Get" mode="get"/>
-    </enum>
-</category>
-
-<!-- base subset of OpenGL 1.4 -->
-<category name="base1.4">
-    <enum name="GENERATE_MIPMAP_HINT"                     value="0x8192"/>
-</category>
-
-<!-- base subset of OpenGL 1.5 -->
-<category name="base1.5">
-    <enum name="BUFFER_SIZE"                              value="0x8764"/>
-    <enum name="BUFFER_USAGE"                             value="0x8765"/>
-    <enum name="ARRAY_BUFFER"                             value="0x8892"/>
-    <enum name="ELEMENT_ARRAY_BUFFER"                     value="0x8893"/>
-    <enum name="ARRAY_BUFFER_BINDING"                     value="0x8894"/>
-    <enum name="ELEMENT_ARRAY_BUFFER_BINDING"             value="0x8895"/>
-    <enum name="STATIC_DRAW"                              value="0x88E4"/>
-    <enum name="DYNAMIC_DRAW"                             value="0x88E8"/>
-
-    <type name="intptr"   size="4"                  glx_name="CARD32"/>
-    <type name="sizeiptr" size="4"                  glx_name="CARD32"/>
-
-    <function name="BindBuffer" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="buffer" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="BufferData" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="size" type="GLsizeiptr" counter="true"/>
-        <param name="data" type="const GLvoid *" count="size" img_null_flag="true"/>
-        <param name="usage" type="GLenum"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="BufferSubData" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="offset" type="GLintptr"/>
-        <param name="size" type="GLsizeiptr" counter="true"/>
-        <param name="data" type="const GLvoid *" count="size"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="DeleteBuffers" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="buffer" type="const GLuint *" count="n"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GenBuffers" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="buffer" type="GLuint *" output="true" count="n"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetBufferParameteriv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="IsBuffer" offset="assign">
-        <param name="buffer" type="GLuint"/>
-        <return type="GLboolean"/>
-        <glx ignore="true"/>
-    </function>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/base2_API.xml b/src/mapi/glapi/gen-es/base2_API.xml
deleted file mode 100644
index b59ef62de64..00000000000
--- a/src/mapi/glapi/gen-es/base2_API.xml
+++ /dev/null
@@ -1,533 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL and OpenGL ES 2.x APIs -->
-
-<OpenGLAPI>
-
-<xi:include href="base1_API.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<!-- base subset of OpenGL 2.0 -->
-<category name="base2.0">
-    <enum name="BLEND_EQUATION_RGB"            count="1"  value="0x8009"> <!-- same as BLEND_EQUATION -->
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ATTRIB_ARRAY_ENABLED"   count="1"  value="0x8622">
-        <size name="GetVertexAttribdv" mode="get"/>
-        <size name="GetVertexAttribfv" mode="get"/>
-        <size name="GetVertexAttribiv" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ATTRIB_ARRAY_SIZE"      count="1"  value="0x8623">
-        <size name="GetVertexAttribdv" mode="get"/>
-        <size name="GetVertexAttribfv" mode="get"/>
-        <size name="GetVertexAttribiv" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ATTRIB_ARRAY_STRIDE"     count="1" value="0x8624">
-        <size name="GetVertexAttribdv" mode="get"/>
-        <size name="GetVertexAttribfv" mode="get"/>
-        <size name="GetVertexAttribiv" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ATTRIB_ARRAY_TYPE"      count="1"  value="0x8625">
-        <size name="GetVertexAttribdv" mode="get"/>
-        <size name="GetVertexAttribfv" mode="get"/>
-        <size name="GetVertexAttribiv" mode="get"/>
-    </enum>
-    <enum name="CURRENT_VERTEX_ATTRIB"         count="1"  value="0x8626">
-        <size name="GetVertexAttribdv" mode="get"/>
-        <size name="GetVertexAttribfv" mode="get"/>
-        <size name="GetVertexAttribiv" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ATTRIB_ARRAY_POINTER"              value="0x8645"/>
-    <enum name="STENCIL_BACK_FUNC"             count="1"  value="0x8800">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_BACK_FAIL"             count="1"  value="0x8801">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_BACK_PASS_DEPTH_FAIL"  count="1"  value="0x8802">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="STENCIL_BACK_PASS_DEPTH_PASS"  count="1"  value="0x8803">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_EQUATION_ALPHA"          count="1"  value="0x883D">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_VERTEX_ATTRIBS"            count="1"  value="0x8869">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ATTRIB_ARRAY_NORMALIZED"           value="0x886A"/>
-    <enum name="MAX_TEXTURE_IMAGE_UNITS"       count="1"  value="0x8872">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FRAGMENT_SHADER"                          value="0x8B30"/>
-    <enum name="VERTEX_SHADER"                            value="0x8B31"/>
-    <enum name="MAX_VERTEX_TEXTURE_IMAGE_UNITS"           value="0x8B4C"/>
-    <enum name="MAX_COMBINED_TEXTURE_IMAGE_UNITS"         value="0x8B4D"/>
-    <enum name="SHADER_TYPE"                              value="0x8B4F"/>
-    <enum name="FLOAT_VEC2"                               value="0x8B50"/>
-    <enum name="FLOAT_VEC3"                               value="0x8B51"/>
-    <enum name="FLOAT_VEC4"                               value="0x8B52"/>
-    <enum name="INT_VEC2"                                 value="0x8B53"/>
-    <enum name="INT_VEC3"                                 value="0x8B54"/>
-    <enum name="INT_VEC4"                                 value="0x8B55"/>
-    <enum name="BOOL"                                     value="0x8B56"/>
-    <enum name="BOOL_VEC2"                                value="0x8B57"/>
-    <enum name="BOOL_VEC3"                                value="0x8B58"/>
-    <enum name="BOOL_VEC4"                                value="0x8B59"/>
-    <enum name="FLOAT_MAT2"                               value="0x8B5A"/>
-    <enum name="FLOAT_MAT3"                               value="0x8B5B"/>
-    <enum name="FLOAT_MAT4"                               value="0x8B5C"/>
-    <enum name="SAMPLER_2D"                               value="0x8B5E"/>
-    <enum name="SAMPLER_CUBE"                             value="0x8B60"/>
-    <enum name="DELETE_STATUS"                            value="0x8B80"/>
-    <enum name="COMPILE_STATUS"                           value="0x8B81"/>
-    <enum name="LINK_STATUS"                              value="0x8B82"/>
-    <enum name="VALIDATE_STATUS"                          value="0x8B83"/>
-    <enum name="INFO_LOG_LENGTH"                          value="0x8B84"/>
-    <enum name="ATTACHED_SHADERS"                         value="0x8B85"/>
-    <enum name="ACTIVE_UNIFORMS"                          value="0x8B86"/>
-    <enum name="ACTIVE_UNIFORM_MAX_LENGTH"                value="0x8B87"/>
-    <enum name="SHADER_SOURCE_LENGTH"                     value="0x8B88"/>
-    <enum name="ACTIVE_ATTRIBUTES"                        value="0x8B89"/>
-    <enum name="ACTIVE_ATTRIBUTE_MAX_LENGTH"              value="0x8B8A"/>
-    <enum name="SHADING_LANGUAGE_VERSION"                 value="0x8B8C"/>
-    <enum name="CURRENT_PROGRAM"                          value="0x8B8D"/>
-    <enum name="STENCIL_BACK_REF"                         value="0x8CA3"/>
-    <enum name="STENCIL_BACK_VALUE_MASK"                  value="0x8CA4"/>
-    <enum name="STENCIL_BACK_WRITEMASK"                   value="0x8CA5"/>
-
-    <type name="char"    size="1"                  glx_name="CARD8"/>
-
-    <function name="AttachShader" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="shader" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="BindAttribLocation" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="index" type="GLuint"/>
-        <param name="name" type="const GLchar *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="BlendEquationSeparate" offset="assign">
-        <param name="modeRGB" type="GLenum"/>
-        <param name="modeA" type="GLenum"/>
-        <glx rop="4228"/>
-    </function>
-
-    <function name="CompileShader" offset="assign">
-        <param name="shader" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="CreateProgram" offset="assign">
-        <return type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="CreateShader" offset="assign">
-        <param name="type" type="GLenum"/>
-        <return type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="DeleteProgram" offset="assign">
-        <param name="program" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="DeleteShader" offset="assign">
-        <param name="program" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="DetachShader" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="shader" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="DisableVertexAttribArray" offset="assign">
-        <param name="index" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="EnableVertexAttribArray" offset="assign">
-        <param name="index" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetActiveAttrib" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="index" type="GLuint"/>
-        <param name="bufSize" type="GLsizei "/>
-        <param name="length" type="GLsizei *" output="true"/>
-        <param name="size" type="GLint *" output="true"/>
-        <param name="type" type="GLenum *" output="true"/>
-        <param name="name" type="GLchar *" output="true"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetActiveUniform" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="index" type="GLuint"/>
-        <param name="bufSize" type="GLsizei"/>
-        <param name="length" type="GLsizei *" output="true"/>
-        <param name="size" type="GLint *" output="true"/>
-        <param name="type" type="GLenum *" output="true"/>
-        <param name="name" type="GLchar *" output="true"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetAttachedShaders" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="maxCount" type="GLsizei"/>
-        <param name="count" type="GLsizei *" output="true"/>
-        <param name="obj" type="GLuint *" output="true"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetAttribLocation" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="name" type="const GLchar *"/>
-        <return type="GLint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetProgramiv" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetProgramInfoLog" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="bufSize" type="GLsizei"/>
-        <param name="length" type="GLsizei *"/>
-        <param name="infoLog" type="GLchar *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetShaderiv" offset="assign">
-        <param name="shader" type="GLuint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetShaderInfoLog" offset="assign">
-        <param name="shader" type="GLuint"/>
-        <param name="bufSize" type="GLsizei"/>
-        <param name="length" type="GLsizei *"/>
-        <param name="infoLog" type="GLchar *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetShaderSource" offset="assign">
-        <param name="shader" type="GLuint"/>
-        <param name="bufSize" type="GLsizei"/>
-        <param name="length" type="GLsizei *" output="true"/>
-        <param name="source" type="GLchar *" output="true"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetUniformfv" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="location" type="GLint"/>
-        <param name="params" type="GLfloat *" output="true"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetUniformiv" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="location" type="GLint"/>
-        <param name="params" type="GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetUniformLocation" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="name" type="const GLchar *"/>
-        <return type="GLint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetVertexAttribfv" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetVertexAttribiv" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="GetVertexAttribPointerv" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="pointer" type="GLvoid **" output="true"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="IsProgram" offset="assign">
-        <param name="program" type="GLuint"/>
-        <return type="GLboolean"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="IsShader" offset="assign">
-        <param name="shader" type="GLuint"/>
-        <return type="GLboolean"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="LinkProgram" offset="assign">
-        <param name="program" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="ShaderSource" offset="assign">
-        <param name="shader" type="GLuint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="string" type="const GLchar **"/>
-        <param name="length" type="const GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="StencilFuncSeparate" offset="assign">
-        <param name="face" type="GLenum"/>
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLint"/>
-        <param name="mask" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="StencilOpSeparate" offset="assign">
-        <param name="face" type="GLenum"/>
-        <param name="sfail" type="GLenum"/>
-        <param name="zfail" type="GLenum"/>
-        <param name="zpass" type="GLenum"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="StencilMaskSeparate" offset="assign">
-        <param name="face" type="GLenum"/>
-        <param name="mask" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform1f" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLfloat"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform1fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform1i" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform1iv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform2f" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLfloat"/>
-        <param name="v1" type="GLfloat"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform2fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform2i" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLint"/>
-        <param name="v1" type="GLint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform2iv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform3f" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLfloat"/>
-        <param name="v1" type="GLfloat"/>
-        <param name="v2" type="GLfloat"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform3fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform3i" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLint"/>
-        <param name="v1" type="GLint"/>
-        <param name="v2" type="GLint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform3iv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform4f" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLfloat"/>
-        <param name="v1" type="GLfloat"/>
-        <param name="v2" type="GLfloat"/>
-        <param name="v3" type="GLfloat"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform4fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform4i" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="v0" type="GLint"/>
-        <param name="v1" type="GLint"/>
-        <param name="v2" type="GLint"/>
-        <param name="v3" type="GLint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="Uniform4iv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="value" type="const GLint *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="UniformMatrix2fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="transpose" type="GLboolean"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="UniformMatrix3fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="transpose" type="GLboolean"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="UniformMatrix4fv" offset="assign">
-        <param name="location" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="transpose" type="GLboolean"/>
-        <param name="value" type="const GLfloat *"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="UseProgram" offset="assign">
-        <param name="program" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="ValidateProgram" offset="assign">
-        <param name="program" type="GLuint"/>
-        <glx ignore="true"/>
-    </function>
-
-    <function name="VertexAttrib1f" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="x" type="GLfloat"/>
-    </function>
-
-    <function name="VertexAttrib1fv" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="v" type="const GLfloat *"/>
-    </function>
-
-    <function name="VertexAttrib2f" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-    </function>
-
-    <function name="VertexAttrib2fv" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="v" type="const GLfloat *"/>
-    </function>
-
-    <function name="VertexAttrib3f" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-    </function>
-
-    <function name="VertexAttrib3fv" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="v" type="const GLfloat *"/>
-    </function>
-
-    <function name="VertexAttrib4f" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <param name="w" type="GLfloat"/>
-    </function>
-
-    <function name="VertexAttrib4fv" offset="assign">
-	<param name="index" type="GLuint"/>
-        <param name="v" type="const GLfloat *"/>
-    </function>
-
-    <function name="VertexAttribPointer" offset="assign">
-        <param name="index" type="GLuint"/>
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="normalized" type="GLboolean"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-    </function>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es1_API.xml b/src/mapi/glapi/gen-es/es1_API.xml
deleted file mode 100644
index 3428ae5616a..00000000000
--- a/src/mapi/glapi/gen-es/es1_API.xml
+++ /dev/null
@@ -1,1100 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL ES 1.x API -->
-
-<OpenGLAPI>
-
-<xi:include href="base1_API.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<!-- core subset of OpenGL 1.3 defined in OpenGL ES 1.0 -->
-<category name="core1.0">
-    <!-- addition to base1.0 -->
-    <enum name="ADD"                                      value="0x0104"/>
-    <enum name="STACK_OVERFLOW"                           value="0x0503"/>
-    <enum name="STACK_UNDERFLOW"                          value="0x0504"/>
-    <enum name="EXP"                                      value="0x0800"/>
-    <enum name="EXP2"                                     value="0x0801"/>
-    <enum name="POINT_SMOOTH"                  count="1"  value="0x0B10">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LINE_SMOOTH"                   count="1"  value="0x0B20">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHTING"                      count="1"  value="0x0B50">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT_MODEL_TWO_SIDE"          count="1"  value="0x0B52">
-        <size name="LightModelfv"/>
-        <size name="LightModeliv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT_MODEL_AMBIENT"           count="4"  value="0x0B53">
-        <size name="LightModelfv"/>
-        <size name="LightModeliv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_MATERIAL"                count="1"  value="0x0B57">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG"                           count="1"  value="0x0B60">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG_DENSITY"                   count="1"  value="0x0B62">
-        <size name="Fogfv"/>
-        <size name="Fogiv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG_START"                     count="1"  value="0x0B63">
-        <size name="Fogfv"/>
-        <size name="Fogiv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG_END"                       count="1"  value="0x0B64">
-        <size name="Fogfv"/>
-        <size name="Fogiv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG_MODE"                      count="1"  value="0x0B65">
-        <size name="Fogfv"/>
-        <size name="Fogiv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG_COLOR"                     count="4"  value="0x0B66">
-        <size name="Fogfv"/>
-        <size name="Fogiv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="NORMALIZE"                     count="1"  value="0x0BA1">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ALPHA_TEST"                    count="1"  value="0x0BC0">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="PERSPECTIVE_CORRECTION_HINT"   count="1"  value="0x0C50">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POINT_SMOOTH_HINT"             count="1"  value="0x0C51">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LINE_SMOOTH_HINT"              count="1"  value="0x0C52">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POLYGON_SMOOTH_HINT"           count="1"  value="0x0C53">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FOG_HINT"                      count="1"  value="0x0C54">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_LIGHTS"                    count="1"  value="0x0D31">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_MODELVIEW_STACK_DEPTH"     count="1"  value="0x0D36">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_PROJECTION_STACK_DEPTH"    count="1"  value="0x0D38">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_TEXTURE_STACK_DEPTH"       count="1"  value="0x0D39">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="AMBIENT"                       count="4"  value="0x1200">
-        <size name="Materialfv"/>
-        <size name="Materialiv"/>
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetMaterialfv" mode="get"/>
-        <size name="GetMaterialiv" mode="get"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="DIFFUSE"                       count="4"  value="0x1201">
-        <size name="Materialfv"/>
-        <size name="Materialiv"/>
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetMaterialfv" mode="get"/>
-        <size name="GetMaterialiv" mode="get"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="SPECULAR"                      count="4"  value="0x1202">
-        <size name="Materialfv"/>
-        <size name="Materialiv"/>
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetMaterialfv" mode="get"/>
-        <size name="GetMaterialiv" mode="get"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="POSITION"                      count="4"  value="0x1203">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="SPOT_DIRECTION"                count="3"  value="0x1204">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="SPOT_EXPONENT"                 count="1"  value="0x1205">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="SPOT_CUTOFF"                   count="1"  value="0x1206">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="CONSTANT_ATTENUATION"          count="1"  value="0x1207">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="LINEAR_ATTENUATION"            count="1"  value="0x1208">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="QUADRATIC_ATTENUATION"         count="1"  value="0x1209">
-        <size name="Lightfv"/>
-        <size name="Lightiv"/>
-        <size name="GetLightfv" mode="get"/>
-        <size name="GetLightiv" mode="get"/>
-    </enum>
-    <enum name="CLEAR"                                    value="0x1500"/>
-    <enum name="AND"                                      value="0x1501"/>
-    <enum name="AND_REVERSE"                              value="0x1502"/>
-    <enum name="COPY"                                     value="0x1503"/>
-    <enum name="AND_INVERTED"                             value="0x1504"/>
-    <enum name="NOOP"                                     value="0x1505"/>
-    <enum name="XOR"                                      value="0x1506"/>
-    <enum name="OR"                                       value="0x1507"/>
-    <enum name="NOR"                                      value="0x1508"/>
-    <enum name="EQUIV"                                    value="0x1509"/>
-    <enum name="OR_REVERSE"                               value="0x150B"/>
-    <enum name="COPY_INVERTED"                            value="0x150C"/>
-    <enum name="OR_INVERTED"                              value="0x150D"/>
-    <enum name="NAND"                                     value="0x150E"/>
-    <enum name="SET"                                      value="0x150F"/>
-    <enum name="EMISSION"                      count="4"  value="0x1600">
-        <size name="Materialfv"/>
-        <size name="Materialiv"/>
-        <size name="GetMaterialfv" mode="get"/>
-        <size name="GetMaterialiv" mode="get"/>
-    </enum>
-    <enum name="SHININESS"                     count="1"  value="0x1601">
-        <size name="Materialfv"/>
-        <size name="Materialiv"/>
-        <size name="GetMaterialfv" mode="get"/>
-        <size name="GetMaterialiv" mode="get"/>
-    </enum>
-    <enum name="AMBIENT_AND_DIFFUSE"           count="4"  value="0x1602">
-        <size name="Materialfv"/>
-        <size name="Materialiv"/>
-        <size name="GetMaterialfv" mode="get"/>
-        <size name="GetMaterialiv" mode="get"/>
-    </enum>
-    <enum name="MODELVIEW"                                value="0x1700"/>
-    <enum name="PROJECTION"                               value="0x1701"/>
-    <enum name="FLAT"                                     value="0x1D00"/>
-    <enum name="SMOOTH"                                   value="0x1D01"/>
-    <enum name="MODULATE"                                 value="0x2100"/>
-    <enum name="DECAL"                                    value="0x2101"/>
-    <enum name="TEXTURE_ENV_MODE"              count="1"  value="0x2200">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_ENV_COLOR"             count="4"  value="0x2201">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_ENV"                              value="0x2300"/>
-    <enum name="LIGHT0"                        count="1"  value="0x4000">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT1"                        count="1"  value="0x4001">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT2"                        count="1"  value="0x4002">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT3"                        count="1"  value="0x4003">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT4"                        count="1"  value="0x4004">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT5"                        count="1"  value="0x4005">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT6"                        count="1"  value="0x4006">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LIGHT7"                        count="1"  value="0x4007">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="AlphaFunc" offset="240">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLclampf"/>
-        <glx rop="159"/>
-    </function>
-
-    <function name="Color4f" offset="29" vectorequiv="Color4fv">
-        <param name="red" type="GLfloat"/>
-        <param name="green" type="GLfloat"/>
-        <param name="blue" type="GLfloat"/>
-        <param name="alpha" type="GLfloat"/>
-    </function>
-
-    <function name="Fogf" offset="153">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="80"/>
-    </function>
-
-    <function name="Fogfv" offset="154">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="81"/>
-    </function>
-
-    <function name="Lightf" offset="159">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="86"/>
-    </function>
-
-    <function name="Lightfv" offset="160">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="87"/>
-    </function>
-
-    <function name="LightModelf" offset="163">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="90"/>
-    </function>
-
-    <function name="LightModelfv" offset="164">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="91"/>
-    </function>
-
-    <function name="LoadIdentity" offset="290">
-        <glx rop="176"/>
-    </function>
-
-    <function name="LoadMatrixf" offset="291">
-        <param name="m" type="const GLfloat *" count="16"/>
-        <glx rop="177"/>
-    </function>
-
-    <function name="LogicOp" offset="242">
-        <param name="opcode" type="GLenum"/>
-        <glx rop="161"/>
-    </function>
-
-    <function name="Materialf" offset="169">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="96"/>
-    </function>
-
-    <function name="Materialfv" offset="170">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="97"/>
-    </function>
-
-    <function name="MatrixMode" offset="293">
-        <param name="mode" type="GLenum"/>
-        <glx rop="179"/>
-    </function>
-
-    <function name="MultMatrixf" offset="294">
-        <param name="m" type="const GLfloat *" count="16"/>
-        <glx rop="180"/>
-    </function>
-
-    <function name="Normal3f" offset="56" vectorequiv="Normal3fv">
-        <param name="nx" type="GLfloat"/>
-        <param name="ny" type="GLfloat"/>
-        <param name="nz" type="GLfloat"/>
-    </function>
-
-    <function name="PointSize" offset="173">
-        <param name="size" type="GLfloat"/>
-        <glx rop="100"/>
-    </function>
-
-    <function name="PopMatrix" offset="297">
-        <glx rop="183"/>
-    </function>
-
-    <function name="PushMatrix" offset="298">
-        <glx rop="184"/>
-    </function>
-
-    <function name="Rotatef" offset="300">
-        <param name="angle" type="GLfloat"/>
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="186"/>
-    </function>
-
-    <function name="Scalef" offset="302">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="188"/>
-    </function>
-
-    <function name="ShadeModel" offset="177">
-        <param name="mode" type="GLenum"/>
-        <glx rop="104"/>
-    </function>
-
-    <function name="TexEnvf" offset="184">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="111"/>
-    </function>
-
-    <function name="TexEnvfv" offset="185">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="112"/>
-    </function>
-
-    <function name="TexImage2D" offset="183">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="target" img_send_null="true" img_pad_dimensions="true"/>
-        <glx rop="110" large="true"/>
-    </function>
-
-    <function name="Translatef" offset="304">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="190"/>
-    </function>
-
-    <!-- addition to base1.1 -->
-    <enum name="COLOR_LOGIC_OP"                           value="0x0BF2"/>
-    <enum name="VERTEX_ARRAY"                  count="1"  value="0x8074">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="NORMAL_ARRAY"                  count="1"  value="0x8075">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_ARRAY"                   count="1"  value="0x8076">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_COORD_ARRAY"           count="1"  value="0x8078">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="ColorPointer" offset="308">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="DisableClientState" offset="309">
-        <param name="array" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="EnableClientState" offset="313">
-        <param name="array" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="NormalPointer" offset="318">
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="TexCoordPointer" offset="320">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="VertexPointer" offset="321">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <!-- addition to base1.2 -->
-    <enum name="SMOOTH_POINT_SIZE_RANGE"       count="2"  value="0x0B12">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SMOOTH_LINE_WIDTH_RANGE"       count="2"  value="0x0B22">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="RESCALE_NORMAL"                count="1"  value="0x803A">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_ELEMENTS_VERTICES"         count="1"  value="0x80E8">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_ELEMENTS_INDICES"          count="1"  value="0x80E9">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <!-- addition to base1.3 -->
-    <enum name="MULTISAMPLE"                   count="1"  value="0x809D">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SAMPLE_ALPHA_TO_ONE"           count="1"  value="0x809F">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_TEXTURE_UNITS"             count="1"  value="0x84E2">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="ClientActiveTexture" offset="375">
-        <param name="texture" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="MultiTexCoord4f" offset="402" vectorequiv="MultiTexCoord4fv">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-        <param name="q" type="GLfloat"/>
-    </function>
-</category>
-
-<!-- core subset of OpenGL 1.5 defined in OpenGL ES 1.1 -->
-<category name="core1.1">
-    <!-- addition to base1.0 -->
-    <enum name="CURRENT_COLOR"                 count="4"  value="0x0B00">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CURRENT_NORMAL"                count="3"  value="0x0B02">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CURRENT_TEXTURE_COORDS"        count="4"  value="0x0B03">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POINT_SIZE"                    count="1"  value="0x0B11">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SHADE_MODEL"                   count="1"  value="0x0B54">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MATRIX_MODE"                   count="1"  value="0x0BA0">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MODELVIEW_STACK_DEPTH"         count="1"  value="0x0BA3">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="PROJECTION_STACK_DEPTH"        count="1"  value="0x0BA4">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_STACK_DEPTH"           count="1"  value="0x0BA5">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MODELVIEW_MATRIX"              count="16" value="0x0BA6">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="PROJECTION_MATRIX"             count="16" value="0x0BA7">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_MATRIX"                count="16" value="0x0BA8">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ALPHA_TEST_FUNC"               count="1"  value="0x0BC1">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ALPHA_TEST_REF"                count="1"  value="0x0BC2">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_DST"                     count="1"  value="0x0BE0">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_SRC"                     count="1"  value="0x0BE1">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="LOGIC_OP_MODE"                 count="1"  value="0x0BF0">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="ALPHA_SCALE"                   count="1"  value="0x0D1C">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="MAX_CLIP_PLANES"               count="1"  value="0x0D32">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CLIP_PLANE0"                   count="1"  value="0x3000">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CLIP_PLANE1"                   count="1"  value="0x3001">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CLIP_PLANE2"                   count="1"  value="0x3002">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CLIP_PLANE3"                   count="1"  value="0x3003">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CLIP_PLANE4"                   count="1"  value="0x3004">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="CLIP_PLANE5"                   count="1"  value="0x3005">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="Color4ub" offset="35" vectorequiv="Color4ubv">
-        <param name="red" type="GLubyte"/>
-        <param name="green" type="GLubyte"/>
-        <param name="blue" type="GLubyte"/>
-        <param name="alpha" type="GLubyte"/>
-    </function>
-
-    <function name="GetLightfv" offset="264">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="118"/>
-    </function>
-
-    <function name="GetMaterialfv" offset="269">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="123"/>
-    </function>
-
-    <function name="GetTexEnvfv" offset="276">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="130"/>
-    </function>
-
-    <function name="GetTexEnviv" offset="277">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="131"/>
-    </function>
-
-    <function name="TexEnvi" offset="186">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="113"/>
-    </function>
-
-    <function name="TexEnviv" offset="187">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="114"/>
-    </function>
-
-    <!-- addition to base1.1 -->
-    <enum name="VERTEX_ARRAY_SIZE"             count="1"  value="0x807A">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ARRAY_TYPE"             count="1"  value="0x807B">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ARRAY_STRIDE"           count="1"  value="0x807C">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="NORMAL_ARRAY_TYPE"             count="1"  value="0x807E">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="NORMAL_ARRAY_STRIDE"           count="1"  value="0x807F">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_ARRAY_SIZE"              count="1"  value="0x8081">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_ARRAY_TYPE"              count="1"  value="0x8082">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_ARRAY_STRIDE"            count="1"  value="0x8083">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_COORD_ARRAY_SIZE"      count="1"  value="0x8088">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_COORD_ARRAY_TYPE"      count="1"  value="0x8089">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_COORD_ARRAY_STRIDE"    count="1"  value="0x808A">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="VERTEX_ARRAY_POINTER"                     value="0x808E"/>
-    <enum name="NORMAL_ARRAY_POINTER"                     value="0x808F"/>
-    <enum name="COLOR_ARRAY_POINTER"                      value="0x8090"/>
-    <enum name="TEXTURE_COORD_ARRAY_POINTER"              value="0x8092"/>
-
-    <function name="GetPointerv" offset="329">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLvoid **" output="true"/>
-        <glx handcode="true"/>
-    </function>
-
-    <!-- addition to base1.2 -->
-
-    <!-- addition to base1.3 -->
-    <enum name="CLIENT_ACTIVE_TEXTURE"         count="1"  value="0x84E1">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="SUBTRACT"                                 value="0x84E7"/>
-    <enum name="COMBINE"                                  value="0x8570"/>
-    <enum name="COMBINE_RGB"                   count="1"  value="0x8571">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="COMBINE_ALPHA"                 count="1"  value="0x8572">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="RGB_SCALE"                     count="1"  value="0x8573">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="ADD_SIGNED"                               value="0x8574"/>
-    <enum name="INTERPOLATE"                              value="0x8575"/>
-    <enum name="CONSTANT"                                 value="0x8576"/>
-    <enum name="PRIMARY_COLOR"                            value="0x8577"/>
-    <enum name="PREVIOUS"                                 value="0x8578"/>
-    <enum name="OPERAND0_RGB"                  count="1"  value="0x8590">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="OPERAND1_RGB"                  count="1"  value="0x8591">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="OPERAND2_RGB"                  count="1"  value="0x8592">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="OPERAND0_ALPHA"                count="1"  value="0x8598">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="OPERAND1_ALPHA"                count="1"  value="0x8599">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="OPERAND2_ALPHA"                count="1"  value="0x859A">
-        <size name="TexEnvfv"/>
-        <size name="TexEnviv"/>
-        <size name="GetTexEnvfv" mode="get"/>
-        <size name="GetTexEnviv" mode="get"/>
-    </enum>
-    <enum name="DOT3_RGB"                                 value="0x86AE"/>
-    <enum name="DOT3_RGBA"                                value="0x86AF"/>
-
-    <!-- addition to base1.4 -->
-    <enum name="POINT_SIZE_MIN"                count="1"  value="0x8126">
-        <size name="PointParameterfv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POINT_SIZE_MAX"                count="1"  value="0x8127">
-        <size name="PointParameterfv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POINT_FADE_THRESHOLD_SIZE"     count="1"  value="0x8128">
-        <size name="PointParameterfv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="POINT_DISTANCE_ATTENUATION"    count="3"  value="0x8129">
-        <size name="PointParameterfv"/>
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="GENERATE_MIPMAP"               count="1"  value="0x8191">
-        <size name="TexParameterfv"/>
-        <size name="TexParameteriv"/>
-        <size name="GetTexParameterfv" mode="get"/>
-        <size name="GetTexParameteriv" mode="get"/>
-    </enum>
-
-    <function name="PointParameterf" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="2065"/>
-    </function>
-
-    <function name="PointParameterfv" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="2066"/>
-    </function>
-
-    <!-- addition to base1.5 -->
-    <enum name="SRC0_RGB"                                 value="0x8580"/>
-    <enum name="SRC1_RGB"                                 value="0x8581"/>
-    <enum name="SRC2_RGB"                                 value="0x8582"/>
-    <enum name="SRC0_ALPHA"                               value="0x8588"/>
-    <enum name="SRC1_ALPHA"                               value="0x8589"/>
-    <enum name="SRC2_ALPHA"                               value="0x858A"/>
-    <enum name="VERTEX_ARRAY_BUFFER_BINDING"   count="1"  value="0x8896">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="NORMAL_ARRAY_BUFFER_BINDING"   count="1"  value="0x8897">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="COLOR_ARRAY_BUFFER_BINDING"    count="1"  value="0x8898">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_COORD_ARRAY_BUFFER_BINDING" count="1" value="0x889A">
-        <size name="Get" mode="get"/>
-    </enum>
-</category>
-
-<!-- OpenGL ES 1.0 -->
-<category name="es1.0">
-    <!-- addition to core1.0 -->
-
-    <!-- from GL_OES_fixed_point -->
-    <enum name="FIXED"                                    value="0x140C"/>
-
-    <type name="fixed"   size="4"                                    />
-    <type name="clampx"  size="4"                                    />
-
-    <function name="AlphaFuncx" offset="assign">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLclampx"/>
-    </function>
-
-    <function name="ClearColorx" offset="assign">
-        <param name="red" type="GLclampx"/>
-        <param name="green" type="GLclampx"/>
-        <param name="blue" type="GLclampx"/>
-        <param name="alpha" type="GLclampx"/>
-    </function>
-
-    <function name="ClearDepthx" offset="assign">
-        <param name="depth" type="GLclampx"/>
-    </function>
-
-    <function name="Color4x" offset="assign">
-        <param name="red" type="GLfixed"/>
-        <param name="green" type="GLfixed"/>
-        <param name="blue" type="GLfixed"/>
-        <param name="alpha" type="GLfixed"/>
-    </function>
-
-    <function name="DepthRangex" offset="assign">
-        <param name="zNear" type="GLclampx"/>
-        <param name="zFar" type="GLclampx"/>
-    </function>
-
-    <function name="Fogx" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="Fogxv" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="Frustumx" offset="assign">
-        <param name="left" type="GLfixed"/>
-        <param name="right" type="GLfixed"/>
-        <param name="bottom" type="GLfixed"/>
-        <param name="top" type="GLfixed"/>
-        <param name="zNear" type="GLfixed"/>
-        <param name="zFar" type="GLfixed"/>
-    </function>
-
-    <function name="LightModelx" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="LightModelxv" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="Lightx" offset="assign">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="Lightxv" offset="assign">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="LineWidthx" offset="assign">
-        <param name="width" type="GLfixed"/>
-    </function>
-
-    <function name="LoadMatrixx" offset="assign">
-        <param name="m" type="const GLfixed *" count="16"/>
-    </function>
-
-    <function name="Materialx" offset="assign">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="Materialxv" offset="assign">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="MultMatrixx" offset="assign">
-        <param name="m" type="const GLfixed *" count="16"/>
-    </function>
-
-    <function name="MultiTexCoord4x" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfixed"/>
-        <param name="t" type="GLfixed"/>
-        <param name="r" type="GLfixed"/>
-        <param name="q" type="GLfixed"/>
-    </function>
-
-    <function name="Normal3x" offset="assign">
-        <param name="nx" type="GLfixed"/>
-        <param name="ny" type="GLfixed"/>
-        <param name="nz" type="GLfixed"/>
-    </function>
-
-    <function name="Orthox" offset="assign">
-        <param name="left" type="GLfixed"/>
-        <param name="right" type="GLfixed"/>
-        <param name="bottom" type="GLfixed"/>
-        <param name="top" type="GLfixed"/>
-        <param name="zNear" type="GLfixed"/>
-        <param name="zFar" type="GLfixed"/>
-    </function>
-
-    <function name="PointSizex" offset="assign">
-        <param name="size" type="GLfixed"/>
-    </function>
-
-    <function name="PolygonOffsetx" offset="assign">
-        <param name="factor" type="GLfixed"/>
-        <param name="units" type="GLfixed"/>
-    </function>
-
-    <function name="Rotatex" offset="assign">
-        <param name="angle" type="GLfixed"/>
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-    </function>
-
-    <function name="SampleCoveragex" offset="assign">
-        <param name="value" type="GLclampx"/>
-        <param name="invert" type="GLboolean"/>
-    </function>
-
-    <function name="Scalex" offset="assign">
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-    </function>
-
-    <function name="TexEnvx" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="TexEnvxv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="TexParameterx" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="Translatex" offset="assign">
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-    </function>
-
-    <!-- from GL_OES_single_precision -->
-    <function name="ClearDepthf" offset="assign">
-        <param name="depth" type="GLclampf"/>
-    </function>
-
-    <function name="DepthRangef" offset="assign">
-        <param name="zNear" type="GLclampf"/>
-        <param name="zFar" type="GLclampf"/>
-    </function>
-
-    <function name="Frustumf" offset="assign">
-        <param name="left" type="GLfloat"/>
-        <param name="right" type="GLfloat"/>
-        <param name="bottom" type="GLfloat"/>
-        <param name="top" type="GLfloat"/>
-        <param name="zNear" type="GLfloat"/>
-        <param name="zFar" type="GLfloat"/>
-    </function>
-
-    <function name="Orthof" offset="assign">
-        <param name="left" type="GLfloat"/>
-        <param name="right" type="GLfloat"/>
-        <param name="bottom" type="GLfloat"/>
-        <param name="top" type="GLfloat"/>
-        <param name="zNear" type="GLfloat"/>
-        <param name="zFar" type="GLfloat"/>
-    </function>
-</category>
-
-<!-- OpenGL ES 1.1 -->
-<category name="es1.1">
-    <!-- addition to core1.1 -->
-
-    <!-- from GL_OES_fixed_point -->
-    <function name="ClipPlanex" offset="assign">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="const GLfixed *" count="4"/>
-    </function>
-
-    <function name="GetClipPlanex" offset="assign">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="GLfixed *" output="true" count="4"/>
-    </function>
-
-    <function name="GetFixedv" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetLightxv" offset="assign">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetMaterialxv" offset="assign">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetTexEnvxv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetTexParameterxv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="PointParameterx" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="PointParameterxv" offset="assign">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *"/>
-    </function>
-
-    <function name="TexParameterxv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <!-- from GL_OES_matrix_get -->
-    <enum name="MODELVIEW_MATRIX_FLOAT_AS_INT_BITS_OES"   value="0x898D"/>
-    <enum name="PROJECTION_MATRIX_FLOAT_AS_INT_BITS_OES"  value="0x898E"/>
-    <enum name="TEXTURE_MATRIX_FLOAT_AS_INT_BITS_OES"	  value="0x898F"/>
-
-    <!-- from GL_OES_single_precision -->
-    <function name="ClipPlanef" offset="assign">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="const GLfloat *" count="4"/>
-    </function>
-
-    <function name="GetClipPlanef" offset="assign">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="GLfloat *" output="true" count="4"/>
-    </function>
-</category>
-
-<xi:include href="es1_EXT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-<xi:include href="es1_COMPAT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es1_COMPAT.xml b/src/mapi/glapi/gen-es/es1_COMPAT.xml
deleted file mode 100644
index 096cea88d6f..00000000000
--- a/src/mapi/glapi/gen-es/es1_COMPAT.xml
+++ /dev/null
@@ -1,135 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<OpenGLAPI>
-
-<!-- This file defines the functions that are needed by Mesa.  It
-     makes sure the generated glapi headers are compatible with Mesa.
-     It mainly consists of missing functions and aliases in OpenGL ES.
--->
-
-<xi:include href="es_COMPAT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<!-- except for those defined by es_COMPAT.xml, these are also needed -->
-<category name="compat">
-    <!-- OpenGL 1.0 -->
-    <function name="TexGenf" alias="TexGenfOES" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="117"/>
-    </function>
-
-    <function name="TexGenfv" alias="TexGenfvOES" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="118"/>
-    </function>
-
-    <function name="TexGeni" alias="TexGeniOES" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="119"/>
-    </function>
-
-    <function name="TexGeniv" alias="TexGenivOES" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="120"/>
-    </function>
-
-    <function name="GetTexGenfv" alias="GetTexGenfvOES" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="133"/>
-    </function>
-
-    <function name="GetTexGeniv" alias="GetTexGenivOES" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="134"/>
-    </function>
-
-    <!-- OpenGL 1.2 -->
-    <function name="BlendColor" offset="336" static_dispatch="false">
-        <param name="red" type="GLclampf"/>
-        <param name="green" type="GLclampf"/>
-        <param name="blue" type="GLclampf"/>
-        <param name="alpha" type="GLclampf"/>
-        <glx rop="4096"/>
-    </function>
-
-    <function name="BlendEquation" alias="BlendEquationOES" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="4097"/>
-    </function>
-
-    <function name="TexImage3D" offset="371" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/>
-        <glx rop="4114" large="true"/>
-    </function>
-
-    <function name="TexSubImage3D" offset="372" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4115" large="true"/>
-    </function>
-
-    <function name="CopyTexSubImage3D" offset="373" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4123"/>
-    </function>
-
-    <!-- GL_ARB_multitexture -->
-    <function name="ActiveTextureARB" alias="ActiveTexture" static_dispatch="false">
-        <param name="texture" type="GLenum"/>
-        <glx rop="197"/>
-    </function>
-
-    <function name="ClientActiveTextureARB" alias="ClientActiveTexture" static_dispatch="false">
-        <param name="texture" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="MultiTexCoord4fARB" alias="MultiTexCoord4f" vectorequiv="MultiTexCoord4fvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-        <param name="q" type="GLfloat"/>
-    </function>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es1_EXT.xml b/src/mapi/glapi/gen-es/es1_EXT.xml
deleted file mode 100644
index c1e86373d8b..00000000000
--- a/src/mapi/glapi/gen-es/es1_EXT.xml
+++ /dev/null
@@ -1,699 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL ES 1.x extensions -->
-
-<OpenGLAPI>
-
-<xi:include href="es_EXT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<!-- part of es1.1 extension pack -->
-<category name="GL_OES_blend_equation_separate" number="1">
-    <enum name="BLEND_EQUATION_RGB_OES"        count="1"  value="0x8009">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_EQUATION_ALPHA_OES"      count="1"  value="0x883D">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="BlendEquationSeparateOES" offset="assign">
-        <param name="modeRGB" type="GLenum"/>
-        <param name="modeA" type="GLenum"/>
-        <glx rop="4228"/>
-    </function>
-</category>
-
-<!-- part of es1.1 extension pack -->
-<category name="GL_OES_blend_func_separate" number="2">
-    <enum name="BLEND_DST_RGB_OES"             count="1"  value="0x80C8">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_SRC_RGB_OES"             count="1"  value="0x80C9">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_DST_ALPHA_OES"           count="1"  value="0x80CA">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_SRC_ALPHA_OES"           count="1"  value="0x80CB">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <function name="BlendFuncSeparateOES" offset="assign">
-        <param name="sfactorRGB" type="GLenum"/>
-        <param name="dfactorRGB" type="GLenum"/>
-        <param name="sfactorAlpha" type="GLenum"/>
-        <param name="dfactorAlpha" type="GLenum"/>
-        <glx rop="4134"/>
-    </function>
-</category>
-
-<!-- part of es1.1 extension pack -->
-<category name="GL_OES_blend_subtract" number="3">
-    <enum name="FUNC_ADD_OES"                             value="0x8006"/>
-    <enum name="BLEND_EQUATION_OES"            count="1"  value="0x8009">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FUNC_SUBTRACT_OES"                        value="0x800A"/>
-    <enum name="FUNC_REVERSE_SUBTRACT_OES"                value="0x800B"/>
-
-    <function name="BlendEquationOES" offset="337">
-        <param name="mode" type="GLenum"/>
-        <glx rop="4097"/>
-    </function>
-</category>
-
-<!-- core addition to es1.0 and later -->
-<category name="GL_OES_byte_coordinates" number="4">
-    <enum name="BYTE"                                     value="0x1400"/>
-</category>
-
-<!-- optional for es1.1 -->
-<category name="GL_OES_draw_texture" number="7">
-    <enum name="TEXTURE_CROP_RECT_OES"                    value="0x8B9D"/>
-
-    <function name="DrawTexiOES" offset="assign">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="z" type="GLint"/>
-        <param name="width" type="GLint"/>
-        <param name="height" type="GLint"/>
-    </function>
-
-    <function name="DrawTexivOES" offset="assign">
-        <param name="coords" type="const GLint *" count="5"/>
-    </function>
-
-    <function name="DrawTexfOES" offset="assign">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <param name="width" type="GLfloat"/>
-        <param name="height" type="GLfloat"/>
-    </function>
-
-    <function name="DrawTexfvOES" offset="assign">
-        <param name="coords" type="const GLfloat *" count="5"/>
-    </function>
-
-    <function name="DrawTexsOES" offset="assign">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-        <param name="z" type="GLshort"/>
-        <param name="width" type="GLshort"/>
-        <param name="height" type="GLshort"/>
-    </function>
-
-    <function name="DrawTexsvOES" offset="assign">
-        <param name="coords" type="const GLshort *" count="5"/>
-    </function>
-
-    <function name="DrawTexxOES" offset="assign">
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-        <param name="width" type="GLfixed"/>
-        <param name="height" type="GLfixed"/>
-    </function>
-
-    <function name="DrawTexxvOES" offset="assign">
-        <param name="coords" type="const GLfixed *" count="5"/>
-    </function>
-
-    <!-- TexParameter{ifx}v is skipped here -->
-</category>
-
-<!-- core addition to es1.0 and later -->
-<category name="GL_OES_fixed_point" number="9">
-    <enum name="FIXED_OES"                                value="0x140C"/>
-
-    <!-- additon to es1.0 -->
-    <function name="AlphaFuncxOES" alias="AlphaFuncx">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLclampx"/>
-    </function>
-
-    <function name="ClearColorxOES" alias="ClearColorx">
-        <param name="red" type="GLclampx"/>
-        <param name="green" type="GLclampx"/>
-        <param name="blue" type="GLclampx"/>
-        <param name="alpha" type="GLclampx"/>
-    </function>
-
-    <function name="ClearDepthxOES" alias="ClearDepthx">
-        <param name="depth" type="GLclampx"/>
-    </function>
-
-    <function name="Color4xOES" alias="Color4x">
-        <param name="red" type="GLfixed"/>
-        <param name="green" type="GLfixed"/>
-        <param name="blue" type="GLfixed"/>
-        <param name="alpha" type="GLfixed"/>
-    </function>
-
-    <function name="DepthRangexOES" alias="DepthRangex">
-        <param name="zNear" type="GLclampx"/>
-        <param name="zFar" type="GLclampx"/>
-    </function>
-
-    <function name="FogxOES" alias="Fogx">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="FogxvOES" alias="Fogxv">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="FrustumxOES" alias="Frustumx">
-        <param name="left" type="GLfixed"/>
-        <param name="right" type="GLfixed"/>
-        <param name="bottom" type="GLfixed"/>
-        <param name="top" type="GLfixed"/>
-        <param name="zNear" type="GLfixed"/>
-        <param name="zFar" type="GLfixed"/>
-    </function>
-
-    <function name="LightModelxOES" alias="LightModelx">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="LightModelxvOES" alias="LightModelxv">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="LightxOES" alias="Lightx">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="LightxvOES" alias="Lightxv">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="LineWidthxOES" alias="LineWidthx">
-        <param name="width" type="GLfixed"/>
-    </function>
-
-    <function name="LoadMatrixxOES" alias="LoadMatrixx">
-        <param name="m" type="const GLfixed *" count="16"/>
-    </function>
-
-    <function name="MaterialxOES" alias="Materialx">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="MaterialxvOES" alias="Materialxv">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="MultiTexCoord4xOES" alias="MultiTexCoord4x">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfixed"/>
-        <param name="t" type="GLfixed"/>
-        <param name="r" type="GLfixed"/>
-        <param name="q" type="GLfixed"/>
-    </function>
-
-    <function name="MultMatrixxOES" alias="MultMatrixx">
-        <param name="m" type="const GLfixed *" count="16"/>
-    </function>
-
-    <function name="Normal3xOES" alias="Normal3x">
-        <param name="nx" type="GLfixed"/>
-        <param name="ny" type="GLfixed"/>
-        <param name="nz" type="GLfixed"/>
-    </function>
-
-    <function name="OrthoxOES" alias="Orthox">
-        <param name="left" type="GLfixed"/>
-        <param name="right" type="GLfixed"/>
-        <param name="bottom" type="GLfixed"/>
-        <param name="top" type="GLfixed"/>
-        <param name="zNear" type="GLfixed"/>
-        <param name="zFar" type="GLfixed"/>
-    </function>
-
-    <function name="PointSizexOES" alias="PointSizex">
-        <param name="size" type="GLfixed"/>
-    </function>
-
-    <function name="PolygonOffsetxOES" alias="PolygonOffsetx">
-        <param name="factor" type="GLfixed"/>
-        <param name="units" type="GLfixed"/>
-    </function>
-
-    <function name="RotatexOES" alias="Rotatex">
-        <param name="angle" type="GLfixed"/>
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-    </function>
-
-    <function name="SampleCoveragexOES" alias="SampleCoveragex">
-        <param name="value" type="GLclampx"/>
-        <param name="invert" type="GLboolean"/>
-    </function>
-
-    <function name="ScalexOES" alias="Scalex">
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-    </function>
-
-    <function name="TexEnvxOES" alias="TexEnvx">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="TexEnvxvOES" alias="TexEnvxv">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-
-    <function name="TexParameterxOES" alias="TexParameterx">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="TranslatexOES" alias="Translatex">
-        <param name="x" type="GLfixed"/>
-        <param name="y" type="GLfixed"/>
-        <param name="z" type="GLfixed"/>
-    </function>
-
-    <!-- additon to es1.1 -->
-    <function name="ClipPlanexOES" alias="ClipPlanex">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="const GLfixed *" count="4"/>
-    </function>
-
-    <function name="GetClipPlanexOES" alias="GetClipPlanex">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="GLfixed *" output="true" count="4"/>
-    </function>
-
-    <function name="GetFixedvOES" alias="GetFixedv">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetLightxvOES" alias="GetLightxv">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetMaterialxvOES" alias="GetMaterialxv">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetTexEnvxvOES" alias="GetTexEnvxv">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="GetTexParameterxvOES" alias="GetTexParameterxv">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="PointParameterxOES" alias="PointParameterx">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfixed"/>
-    </function>
-
-    <function name="PointParameterxvOES" alias="PointParameterxv">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *"/>
-    </function>
-
-    <function name="TexParameterxvOES" alias="TexParameterxv">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-</category>
-
-<!-- part of es1.1 extension pack -->
-<category name="GL_OES_framebuffer_object" number="10">
-    <enum name="NONE_OES"                                 value="0"/>
-    <enum name="INVALID_FRAMEBUFFER_OPERATION_OES"        value="0x0506"/>
-    <enum name="RGBA4_OES"                                value="0x8056"/>
-    <enum name="RGB5_A1_OES"                              value="0x8057"/>
-    <enum name="DEPTH_COMPONENT16_OES"                    value="0x81A5"/>
-
-    <enum name="MAX_RENDERBUFFER_SIZE_OES"                value="0x84E8"/>
-    <enum name="FRAMEBUFFER_BINDING_OES"                  value="0x8CA6"/>
-    <enum name="RENDERBUFFER_BINDING_OES"                 value="0x8CA7"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_OES"   value="0x8CD0"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_OES"   value="0x8CD1"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_OES" value="0x8CD2"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE_OES" value="0x8CD3"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_3D_ZOFFSET_OES" value="0x8CD4"/>
-    <enum name="FRAMEBUFFER_COMPLETE_OES"                 value="0x8CD5"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_ATTACHMENT_OES"    value="0x8CD6"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_OES" value="0x8CD7"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_DIMENSIONS_OES"    value="0x8CD9"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_FORMATS_OES"       value="0x8CDA"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_OES"   value="0x8CDB"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_READ_BUFFER_OES"   value="0x8CDC"/>
-    <enum name="FRAMEBUFFER_UNSUPPORTED_OES"              value="0x8CDD"/>
-    <enum name="COLOR_ATTACHMENT0_OES"                    value="0x8CE0"/>
-    <enum name="DEPTH_ATTACHMENT_OES"                     value="0x8D00"/>
-    <enum name="STENCIL_ATTACHMENT_OES"                   value="0x8D20"/>
-    <enum name="FRAMEBUFFER_OES"                          value="0x8D40"/>
-    <enum name="RENDERBUFFER_OES"                         value="0x8D41"/>
-    <enum name="RENDERBUFFER_WIDTH_OES"                   value="0x8D42"/>
-    <enum name="RENDERBUFFER_HEIGHT_OES"                  value="0x8D43"/>
-    <enum name="RENDERBUFFER_INTERNAL_FORMAT_OES"         value="0x8D44"/>
-    <enum name="STENCIL_INDEX1_OES"                       value="0x8D46"/>
-    <enum name="STENCIL_INDEX4_OES"                       value="0x8D47"/>
-    <enum name="STENCIL_INDEX8_OES"                       value="0x8D48"/>
-    <enum name="RENDERBUFFER_RED_SIZE_OES"                value="0x8D50"/>
-    <enum name="RENDERBUFFER_GREEN_SIZE_OES"              value="0x8D51"/>
-    <enum name="RENDERBUFFER_BLUE_SIZE_OES"               value="0x8D52"/>
-    <enum name="RENDERBUFFER_ALPHA_SIZE_OES"              value="0x8D53"/>
-    <enum name="RENDERBUFFER_DEPTH_SIZE_OES"              value="0x8D54"/>
-    <enum name="RENDERBUFFER_STENCIL_SIZE_OES"            value="0x8D55"/>
-    <enum name="RGB565_OES"                               value="0x8D62"/>
-
-    <function name="BindFramebufferOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="framebuffer" type="GLuint"/>
-    </function>
-
-    <function name="BindRenderbufferOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="renderbuffer" type="GLuint"/>
-    </function>
-
-    <function name="CheckFramebufferStatusOES" offset="assign">
-        <param name="target" type="GLenum"/>
-	<return type="GLenum"/>
-    </function>
-
-    <function name="DeleteFramebuffersOES" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="framebuffers" type="const GLuint *" count="n"/>
-    </function>
-
-    <function name="DeleteRenderbuffersOES" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="renderbuffers" type="const GLuint *" count="n"/>
-    </function>
-
-    <function name="FramebufferRenderbufferOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="renderbuffertarget" type="GLenum"/>
-        <param name="renderbuffer" type="GLuint"/>
-    </function>
-
-    <function name="FramebufferTexture2DOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="textarget" type="GLenum"/>
-        <param name="texture" type="GLuint"/>
-        <param name="level" type="GLint"/>
-    </function>
-
-    <function name="GenerateMipmapOES" offset="assign">
-        <param name="target" type="GLenum"/>
-    </function>
-
-    <function name="GenFramebuffersOES" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="framebuffers" type="GLuint *" count="n" output="true"/>
-    </function>
-
-    <function name="GenRenderbuffersOES" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="renderbuffers" type="GLuint *" count="n" output="true"/>
-    </function>
-
-    <function name="GetFramebufferAttachmentParameterivOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true"/>
-    </function>
-
-    <function name="GetRenderbufferParameterivOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true"/>
-    </function>
-
-    <function name="IsFramebufferOES" offset="assign">
-        <param name="framebuffer" type="GLuint"/>
-	<return type="GLboolean"/>
-    </function>
-
-    <function name="IsRenderbufferOES" offset="assign">
-        <param name="renderbuffer" type="GLuint"/>
-	<return type="GLboolean"/>
-    </function>
-
-    <function name="RenderbufferStorageOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-    </function>
-</category>
-
-<!-- core addition to es1.1 -->
-<category name="GL_OES_matrix_get" number="11">
-    <enum name="MODELVIEW_MATRIX_FLOAT_AS_INT_BITS_OES"   value="0x898D"/>
-    <enum name="PROJECTION_MATRIX_FLOAT_AS_INT_BITS_OES"  value="0x898E"/>
-    <enum name="TEXTURE_MATRIX_FLOAT_AS_INT_BITS_OES"	  value="0x898F"/>
-</category>
-
-<!-- optional for es1.1 -->
-<category name="GL_OES_matrix_palette" number="12">
-    <enum name="MAX_VERTEX_UNITS_OES"                     value="0x86A4"/>
-    <enum name="WEIGHT_ARRAY_TYPE_OES"                    value="0x86A9"/>
-    <enum name="WEIGHT_ARRAY_STRIDE_OES"                  value="0x86AA"/>
-    <enum name="WEIGHT_ARRAY_SIZE_OES"                    value="0x86AB"/>
-    <enum name="WEIGHT_ARRAY_POINTER_OES"                 value="0x86AC"/>
-    <enum name="WEIGHT_ARRAY_OES"                         value="0x86AD"/>
-    <enum name="MATRIX_PALETTE_OES"                       value="0x8840"/>
-    <enum name="MAX_PALETTE_MATRICES_OES"                 value="0x8842"/>
-    <enum name="CURRENT_PALETTE_MATRIX_OES"               value="0x8843"/>
-    <enum name="MATRIX_INDEX_ARRAY_OES"                   value="0x8844"/>
-    <enum name="MATRIX_INDEX_ARRAY_SIZE_OES"              value="0x8846"/>
-    <enum name="MATRIX_INDEX_ARRAY_TYPE_OES"              value="0x8847"/>
-    <enum name="MATRIX_INDEX_ARRAY_STRIDE_OES"            value="0x8848"/>
-    <enum name="MATRIX_INDEX_ARRAY_POINTER_OES"           value="0x8849"/>
-    <enum name="WEIGHT_ARRAY_BUFFER_BINDING_OES"          value="0x889E"/>
-    <enum name="MATRIX_INDEX_ARRAY_BUFFER_BINDING_OES"    value="0x8B9E"/>
-
-    <function name="CurrentPaletteMatrixOES">
-        <param name="matrixpaletteindex" type="GLuint"/>
-    </function>
-
-    <function name="LoadPaletteFromModelViewMatrixOES">
-    </function>
-
-    <function name="MatrixIndexPointerOES">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-    </function>
-
-    <function name="WeightPointerOES">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-    </function>
-</category>
-
-<!-- required for es1.1 -->
-<category name="GL_OES_point_size_array" number="14">
-    <enum name="POINT_SIZE_ARRAY_TYPE_OES"                value="0x898A"/>
-    <enum name="POINT_SIZE_ARRAY_STRIDE_OES"	          value="0x898B"/>
-    <enum name="POINT_SIZE_ARRAY_POINTER_OES"	          value="0x898C"/>
-    <enum name="POINT_SIZE_ARRAY_OES"                     value="0x8B9C"/>
-    <enum name="POINT_SIZE_ARRAY_BUFFER_BINDING_OES"	  value="0x8B9F"/>
-
-    <function name="PointSizePointerOES" offset="assign">
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-    </function>
-</category>
-
-<!-- required for es1.1 -->
-<category name="GL_OES_point_sprite" number="15">
-    <enum name="POINT_SPRITE_OES"                         value="0x8861"/>
-    <enum name="COORD_REPLACE_OES"	                  value="0x8862"/>
-</category>
-
-<!-- optional for es1.0 -->
-<category name="GL_OES_query_matrix" number="16">
-    <function name="QueryMatrixxOES" offset="assign">
-        <param name="mantissa" type="GLfixed *" count="16" />
-        <param name="exponent" type="GLint *" count="16" />
-	<return type="GLbitfield"/>
-    </function>
-</category>
-
-<!-- required for es1.0 and later -->
-<category name="GL_OES_read_format" number="17">
-    <enum name="IMPLEMENTATION_COLOR_READ_TYPE_OES"       value="0x8B9A"/>
-    <enum name="IMPLEMENTATION_COLOR_READ_FORMAT_OES"     value="0x8B9B"/>
-</category>
-
-<!-- core addition to es1.0 and later -->
-<category name="GL_OES_single_precision" number="18">
-    <!-- additon to es1.0 -->
-    <function name="ClearDepthfOES" alias="ClearDepthf">
-        <param name="depth" type="GLclampf"/>
-    </function>
-
-    <function name="DepthRangefOES" alias="DepthRangef">
-        <param name="zNear" type="GLclampf"/>
-        <param name="zFar" type="GLclampf"/>
-    </function>
-
-    <function name="FrustumfOES" alias="Frustumf">
-        <param name="left" type="GLfloat"/>
-        <param name="right" type="GLfloat"/>
-        <param name="bottom" type="GLfloat"/>
-        <param name="top" type="GLfloat"/>
-        <param name="zNear" type="GLfloat"/>
-        <param name="zFar" type="GLfloat"/>
-    </function>
-
-    <function name="OrthofOES" alias="Orthof">
-        <param name="left" type="GLfloat"/>
-        <param name="right" type="GLfloat"/>
-        <param name="bottom" type="GLfloat"/>
-        <param name="top" type="GLfloat"/>
-        <param name="zNear" type="GLfloat"/>
-        <param name="zFar" type="GLfloat"/>
-    </function>
-
-    <!-- additon to es1.1 -->
-    <function name="ClipPlanefOES" alias="ClipPlanef">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="const GLfloat *" count="4"/>
-    </function>
-
-    <function name="GetClipPlanefOES" alias="GetClipPlanef">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="GLfloat *" output="true" count="4"/>
-    </function>
-</category>
-
-<!-- part of es1.1 extension pack -->
-<category name="GL_OES_texture_cube_map" number="20">
-    <enum name="TEXTURE_GEN_MODE_OES"                  value="0x2500"/>
-    <enum name="NORMAL_MAP_OES"                        value="0x8511"/>
-    <enum name="REFLECTION_MAP_OES"                    value="0x8512"/>
-    <enum name="TEXTURE_CUBE_MAP_OES"                  value="0x8513"/>
-    <enum name="TEXTURE_BINDING_CUBE_MAP_OES"          value="0x8514"/>
-    <enum name="TEXTURE_CUBE_MAP_POSITIVE_X_OES"       value="0x8515"/>
-    <enum name="TEXTURE_CUBE_MAP_NEGATIVE_X_OES"       value="0x8516"/>
-    <enum name="TEXTURE_CUBE_MAP_POSITIVE_Y_OES"       value="0x8517"/>
-    <enum name="TEXTURE_CUBE_MAP_NEGATIVE_Y_OES"       value="0x8518"/>
-    <enum name="TEXTURE_CUBE_MAP_POSITIVE_Z_OES"       value="0x8519"/>
-    <enum name="TEXTURE_CUBE_MAP_NEGATIVE_Z_OES"       value="0x851A"/>
-    <enum name="MAX_CUBE_MAP_TEXTURE_SIZE_OES"         value="0x851C"/>
-    <enum name="TEXTURE_GEN_STR_OES"                   value="0x8D60"/>
-
-    <function name="GetTexGenfvOES" offset="279">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="133"/>
-    </function>
-
-    <function name="GetTexGenivOES" offset="280">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="134"/>
-    </function>
-
-    <function name="GetTexGenxvOES" offset="assign">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfixed *" output="true" variable_param="pname"/>
-    </function>
-
-    <function name="TexGenfOES" offset="190">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="117"/>
-    </function>
-
-    <function name="TexGenfvOES" offset="191">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="118"/>
-    </function>
-
-    <function name="TexGeniOES" offset="192">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="119"/>
-    </function>
-
-    <function name="TexGenivOES" offset="193">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="120"/>
-    </function>
-
-    <function name="TexGenxOES" offset="assign">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-    </function>
-
-    <function name="TexGenxvOES" offset="assign">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfixed *" variable_param="pname"/>
-    </function>
-</category>
-
-<category name="GL_OES_texture_env_crossbar" number="21">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<category name="GL_OES_texture_mirrored_repeat" number="22">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<category name="GL_EXT_texture_lod_bias" number="60">
-    <enum name="TEXTURE_FILTER_CONTROL_EXT"               value="0x8500"/>
-    <enum name="TEXTURE_LOD_BIAS_EXT"                     value="0x8501"/>
-    <enum name="MAX_TEXTURE_LOD_BIAS_EXT"                 value="0x84FD"/>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es2_API.xml b/src/mapi/glapi/gen-es/es2_API.xml
deleted file mode 100644
index f8af63b94fe..00000000000
--- a/src/mapi/glapi/gen-es/es2_API.xml
+++ /dev/null
@@ -1,294 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL ES 2.x API -->
-
-<OpenGLAPI>
-
-<xi:include href="base2_API.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<!-- core subset of OpenGL 2.0 defined in OpenGL ES 2.0 -->
-<category name="core2.0">
-    <!-- addition to base1.0 -->
-    <enum name="NONE"                                     value="0x0"/>
-    <enum name="INT"                           count="4"  value="0x1404">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="UNSIGNED_INT"                  count="4"  value="0x1405">
-        <size name="CallLists"/>
-    </enum>
-    <enum name="STENCIL_INDEX"                            value="0x1901"/>
-    <enum name="DEPTH_COMPONENT"                          value="0x1902"/>
-
-    <function name="TexImage2D" offset="183">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/> <!-- XXX the actual type is GLenum... -->
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="target" img_send_null="true" img_pad_dimensions="true"/>
-        <glx rop="110" large="true"/>
-    </function>
-
-    <!-- addition to base1.1 -->
-    <enum name="RGBA4"                                    value="0x8056"/>
-    <enum name="RGB5_A1"                                  value="0x8057"/>
-
-    <!-- addition to base1.2 -->
-    <enum name="CONSTANT_COLOR"                           value="0x8001"/>
-    <enum name="ONE_MINUS_CONSTANT_COLOR"                 value="0x8002"/>
-    <enum name="CONSTANT_ALPHA"                           value="0x8003"/>
-    <enum name="ONE_MINUS_CONSTANT_ALPHA"                 value="0x8004"/>
-    <enum name="BLEND_COLOR"                   count="4"  value="0x8005">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FUNC_ADD"                                 value="0x8006"/>
-    <enum name="BLEND_EQUATION"                count="1"  value="0x8009">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="FUNC_SUBTRACT"                            value="0x800A"/>
-    <enum name="FUNC_REVERSE_SUBTRACT"                    value="0x800B"/>
-
-    <function name="BlendColor" offset="336">
-        <param name="red" type="GLclampf"/>
-        <param name="green" type="GLclampf"/>
-        <param name="blue" type="GLclampf"/>
-        <param name="alpha" type="GLclampf"/>
-        <glx rop="4096"/>
-    </function>
-
-    <function name="BlendEquation" offset="337">
-        <param name="mode" type="GLenum"/>
-        <glx rop="4097"/>
-    </function>
-
-    <!-- addition to base1.3 -->
-    <enum name="TEXTURE_CUBE_MAP"              count="1"  value="0x8513">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_BINDING_CUBE_MAP"      count="1"  value="0x8514">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="TEXTURE_CUBE_MAP_POSITIVE_X"              value="0x8515"/>
-    <enum name="TEXTURE_CUBE_MAP_NEGATIVE_X"              value="0x8516"/>
-    <enum name="TEXTURE_CUBE_MAP_POSITIVE_Y"              value="0x8517"/>
-    <enum name="TEXTURE_CUBE_MAP_NEGATIVE_Y"              value="0x8518"/>
-    <enum name="TEXTURE_CUBE_MAP_POSITIVE_Z"              value="0x8519"/>
-    <enum name="TEXTURE_CUBE_MAP_NEGATIVE_Z"              value="0x851A"/>
-    <enum name="MAX_CUBE_MAP_TEXTURE_SIZE"     count="1"  value="0x851C">
-        <size name="Get" mode="get"/>
-    </enum>
-
-    <!-- addition to base1.4 -->
-    <enum name="BLEND_DST_RGB"                 count="1"  value="0x80C8">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_SRC_RGB"                 count="1"  value="0x80C9">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_DST_ALPHA"               count="1"  value="0x80CA">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="BLEND_SRC_ALPHA"               count="1"  value="0x80CB">
-        <size name="Get" mode="get"/>
-    </enum>
-    <enum name="DEPTH_COMPONENT16"                        value="0x81A5"/>
-    <enum name="MIRRORED_REPEAT"                          value="0x8370"/>
-    <enum name="INCR_WRAP"                                value="0x8507"/>
-    <enum name="DECR_WRAP"                                value="0x8508"/>
-
-    <function name="BlendFuncSeparate" offset="assign">
-        <param name="sfactorRGB" type="GLenum"/>
-        <param name="dfactorRGB" type="GLenum"/>
-        <param name="sfactorAlpha" type="GLenum"/>
-        <param name="dfactorAlpha" type="GLenum"/>
-        <glx rop="4134"/>
-    </function>
-
-    <!-- addition to base1.5 -->
-    <enum name="VERTEX_ATTRIB_ARRAY_BUFFER_BINDING" count="1" value="0x889F">
-        <size name="GetVertexAttribdv" mode="get"/>
-        <size name="GetVertexAttribfv" mode="get"/>
-        <size name="GetVertexAttribiv" mode="get"/>
-    </enum>
-    <enum name="STREAM_DRAW"                              value="0x88E0"/>
-
-    <!-- addition to base2.0 -->
-    <!-- base2.0 should have everything defined -->
-</category>
-
-<!-- OpenGL ES 2.0 -->
-<category name="es2.0">
-    <!-- addition to core2.0 -->
-    <enum name="LOW_FLOAT"                                    value="0x8DF0"/>
-    <enum name="MEDIUM_FLOAT"                                 value="0x8DF1"/>
-    <enum name="HIGH_FLOAT"                                   value="0x8DF2"/>
-    <enum name="LOW_INT"                                      value="0x8DF3"/>
-    <enum name="MEDIUM_INT"                                   value="0x8DF4"/>
-    <enum name="HIGH_INT"                                     value="0x8DF5"/>
-    <enum name="SHADER_BINARY_FORMATS"                        value="0x8DF8"/>
-    <enum name="NUM_SHADER_BINARY_FORMATS"                    value="0x8DF9"/>
-    <enum name="SHADER_COMPILER"                              value="0x8DFA"/>
-    <enum name="MAX_VERTEX_UNIFORM_VECTORS"                   value="0x8DFB"/>
-    <enum name="MAX_VARYING_VECTORS"                          value="0x8DFC"/>
-    <enum name="MAX_FRAGMENT_UNIFORM_VECTORS"                 value="0x8DFD"/>
-
-    <function name="GetShaderPrecisionFormat" offset="assign">
-        <param name="shadertype" type="GLenum"/>
-        <param name="precisiontype" type="GLenum"/>
-        <param name="range" type="GLint *"/>
-        <param name="precision" type="GLint *"/>
-    </function>
-
-    <function name="ReleaseShaderCompiler" offset="assign">
-    </function>
-
-    <function name="ShaderBinary" offset="assign">
-        <param name="n" type="GLsizei"/>
-        <param name="shaders" type="const GLuint *"/>
-        <param name="binaryformat" type="GLenum"/>
-        <param name="binary" type="const GLvoid *"/>
-        <param name="length" type="GLsizei"/>
-    </function>
-
-    <!-- from GL_OES_fixed_point -->
-    <enum name="FIXED"                                    value="0x140C"/>
-    <type name="fixed"   size="4"                                    />
-
-    <!-- from GL_OES_framebuffer_object -->
-    <enum name="INVALID_FRAMEBUFFER_OPERATION"                value="0x0506"/>
-    <enum name="MAX_RENDERBUFFER_SIZE"                        value="0x84E8"/>
-    <enum name="FRAMEBUFFER_BINDING"                          value="0x8CA6"/>
-    <enum name="RENDERBUFFER_BINDING"                         value="0x8CA7"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE"           value="0x8CD0"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_OBJECT_NAME"           value="0x8CD1"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL"         value="0x8CD2"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE" value="0x8CD3"/>
-    <enum name="FRAMEBUFFER_COMPLETE"                         value="0x8CD5"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_ATTACHMENT"            value="0x8CD6"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT"    value="0x8CD7"/>
-    <enum name="FRAMEBUFFER_INCOMPLETE_DIMENSIONS"            value="0x8CD9"/>
-    <enum name="FRAMEBUFFER_UNSUPPORTED"                      value="0x8CDD"/>
-    <enum name="COLOR_ATTACHMENT0"                            value="0x8CE0"/>
-    <enum name="DEPTH_ATTACHMENT"                             value="0x8D00"/>
-    <enum name="STENCIL_ATTACHMENT"                           value="0x8D20"/>
-    <enum name="FRAMEBUFFER"                                  value="0x8D40"/>
-    <enum name="RENDERBUFFER"                                 value="0x8D41"/>
-    <enum name="RENDERBUFFER_WIDTH"                           value="0x8D42"/>
-    <enum name="RENDERBUFFER_HEIGHT"                          value="0x8D43"/>
-    <enum name="RENDERBUFFER_INTERNAL_FORMAT"                 value="0x8D44"/>
-    <enum name="STENCIL_INDEX8"                               value="0x8D48"/>
-    <enum name="RENDERBUFFER_RED_SIZE"                        value="0x8D50"/>
-    <enum name="RENDERBUFFER_GREEN_SIZE"                      value="0x8D51"/>
-    <enum name="RENDERBUFFER_BLUE_SIZE"                       value="0x8D52"/>
-    <enum name="RENDERBUFFER_ALPHA_SIZE"                      value="0x8D53"/>
-    <enum name="RENDERBUFFER_DEPTH_SIZE"                      value="0x8D54"/>
-    <enum name="RENDERBUFFER_STENCIL_SIZE"                    value="0x8D55"/>
-    <enum name="RGB565"                                       value="0x8D62"/>
-
-    <function name="BindFramebuffer" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="framebuffer" type="GLuint"/>
-    </function>
-
-    <function name="BindRenderbuffer" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="renderbuffer" type="GLuint"/>
-    </function>
-
-    <function name="CheckFramebufferStatus" offset="assign">
-        <param name="target" type="GLenum"/>
-	<return type="GLenum"/>
-    </function>
-
-    <function name="DeleteFramebuffers" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="framebuffers" type="const GLuint *" count="n"/>
-    </function>
-
-    <function name="DeleteRenderbuffers" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="renderbuffers" type="const GLuint *" count="n"/>
-    </function>
-
-    <function name="FramebufferRenderbuffer" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="renderbuffertarget" type="GLenum"/>
-        <param name="renderbuffer" type="GLuint"/>
-    </function>
-
-    <function name="FramebufferTexture2D" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="textarget" type="GLenum"/>
-        <param name="texture" type="GLuint"/>
-        <param name="level" type="GLint"/>
-    </function>
-
-    <function name="GenerateMipmap" offset="assign">
-        <param name="target" type="GLenum"/>
-    </function>
-
-    <function name="GenFramebuffers" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="framebuffers" type="GLuint *" count="n" output="true"/>
-    </function>
-
-    <function name="GenRenderbuffers" offset="assign">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="renderbuffers" type="GLuint *" count="n" output="true"/>
-    </function>
-
-    <function name="GetFramebufferAttachmentParameteriv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true"/>
-    </function>
-
-    <function name="GetRenderbufferParameteriv" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true"/>
-    </function>
-
-    <function name="IsFramebuffer" offset="assign">
-        <param name="framebuffer" type="GLuint"/>
-	<return type="GLboolean"/>
-    </function>
-
-    <function name="IsRenderbuffer" offset="assign">
-        <param name="renderbuffer" type="GLuint"/>
-	<return type="GLboolean"/>
-    </function>
-
-    <function name="RenderbufferStorage" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-    </function>
-
-    <!-- from GL_OES_read_format -->
-    <enum name="IMPLEMENTATION_COLOR_READ_TYPE"           value="0x8B9A"/>
-    <enum name="IMPLEMENTATION_COLOR_READ_FORMAT"         value="0x8B9B"/>
-
-    <!-- from GL_OES_single_precision -->
-    <function name="ClearDepthf" offset="assign">
-        <param name="depth" type="GLclampf"/>
-    </function>
-
-    <function name="DepthRangef" offset="assign">
-        <param name="zNear" type="GLclampf"/>
-        <param name="zFar" type="GLclampf"/>
-    </function>
-</category>
-
-<xi:include href="es2_EXT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-<xi:include href="es2_COMPAT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es2_COMPAT.xml b/src/mapi/glapi/gen-es/es2_COMPAT.xml
deleted file mode 100644
index 1bd3569635b..00000000000
--- a/src/mapi/glapi/gen-es/es2_COMPAT.xml
+++ /dev/null
@@ -1,368 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<OpenGLAPI>
-
-<!-- This file defines the functions that are needed by Mesa.  It
-     makes sure the generated glapi headers are compatible with Mesa.
-     It mainly consists of missing functions and aliases in OpenGL ES.
--->
-
-<xi:include href="es_COMPAT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<!-- except for those defined by es_COMPAT.xml, these are also needed -->
-<category name="compat">
-    <!-- OpenGL 1.0 -->
-    <function name="Color4f" offset="29" vectorequiv="Color4fv" static_dispatch="false">
-        <param name="red" type="GLfloat"/>
-        <param name="green" type="GLfloat"/>
-        <param name="blue" type="GLfloat"/>
-        <param name="alpha" type="GLfloat"/>
-    </function>
-
-    <function name="Color4ub" offset="35" vectorequiv="Color4ubv" static_dispatch="false">
-        <param name="red" type="GLubyte"/>
-        <param name="green" type="GLubyte"/>
-        <param name="blue" type="GLubyte"/>
-        <param name="alpha" type="GLubyte"/>
-    </function>
-
-    <function name="Normal3f" offset="56" vectorequiv="Normal3fv" static_dispatch="false">
-        <param name="nx" type="GLfloat"/>
-        <param name="ny" type="GLfloat"/>
-        <param name="nz" type="GLfloat"/>
-    </function>
-
-    <function name="Fogf" offset="153" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="80"/>
-    </function>
-
-    <function name="Fogfv" offset="154" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="81"/>
-    </function>
-
-    <function name="Lightf" offset="159" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="86"/>
-    </function>
-
-    <function name="Lightfv" offset="160" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="87"/>
-    </function>
-
-    <function name="LightModelf" offset="163" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="90"/>
-    </function>
-
-    <function name="LightModelfv" offset="164" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="91"/>
-    </function>
-
-    <function name="Materialf" offset="169" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="96"/>
-    </function>
-
-    <function name="Materialfv" offset="170" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="97"/>
-    </function>
-
-    <function name="PointSize" offset="173" static_dispatch="false">
-        <param name="size" type="GLfloat"/>
-        <glx rop="100"/>
-    </function>
-
-    <function name="ShadeModel" offset="177" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="104"/>
-    </function>
-
-    <function name="TexEnvf" offset="184" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="111"/>
-    </function>
-
-    <function name="TexEnvfv" offset="185" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="112"/>
-    </function>
-
-    <function name="TexEnvi" offset="186" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="113"/>
-    </function>
-
-    <function name="TexEnviv" offset="187" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="114"/>
-    </function>
-
-    <function name="TexGenf" offset="190" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="117"/>
-    </function>
-
-    <function name="TexGenfv" offset="191" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="118"/>
-    </function>
-
-    <function name="TexGeni" offset="192" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="119"/>
-    </function>
-
-    <function name="TexGeniv" offset="193" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="120"/>
-    </function>
-
-    <function name="AlphaFunc" offset="240" static_dispatch="false">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLclampf"/>
-        <glx rop="159"/>
-    </function>
-
-    <function name="LogicOp" offset="242" static_dispatch="false">
-        <param name="opcode" type="GLenum"/>
-        <glx rop="161"/>
-    </function>
-
-    <function name="GetLightfv" offset="264" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="118"/>
-    </function>
-
-    <function name="GetMaterialfv" offset="269" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="123"/>
-    </function>
-
-    <function name="GetTexEnvfv" offset="276" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="130"/>
-    </function>
-
-    <function name="GetTexEnviv" offset="277" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="131"/>
-    </function>
-
-    <function name="GetTexGenfv" offset="279" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="133"/>
-    </function>
-
-    <function name="GetTexGeniv" offset="280" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="134"/>
-    </function>
-
-    <function name="LoadIdentity" offset="290" static_dispatch="false">
-        <glx rop="176"/>
-    </function>
-
-    <function name="LoadMatrixf" offset="291" static_dispatch="false">
-        <param name="m" type="const GLfloat *" count="16"/>
-        <glx rop="177"/>
-    </function>
-
-    <function name="MatrixMode" offset="293" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="179"/>
-    </function>
-
-    <function name="MultMatrixf" offset="294" static_dispatch="false">
-        <param name="m" type="const GLfloat *" count="16"/>
-        <glx rop="180"/>
-    </function>
-
-    <function name="PopMatrix" offset="297" static_dispatch="false">
-        <glx rop="183"/>
-    </function>
-
-    <function name="PushMatrix" offset="298" static_dispatch="false">
-        <glx rop="184"/>
-    </function>
-
-    <function name="Rotatef" offset="300" static_dispatch="false">
-        <param name="angle" type="GLfloat"/>
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="186"/>
-    </function>
-
-    <function name="Scalef" offset="302" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="188"/>
-    </function>
-
-    <function name="Translatef" offset="304" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="190"/>
-    </function>
-
-    <!-- OpenGL 1.1 -->
-    <function name="ColorPointer" offset="308" static_dispatch="false">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="DisableClientState" offset="309" static_dispatch="false">
-        <param name="array" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="EnableClientState" offset="313" static_dispatch="false">
-        <param name="array" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="NormalPointer" offset="318" static_dispatch="false">
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="TexCoordPointer" offset="320" static_dispatch="false">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="VertexPointer" offset="321" static_dispatch="false">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="GetPointerv" offset="329" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLvoid **" output="true"/>
-        <glx handcode="true"/>
-    </function>
-
-    <!-- OpenGL 1.2 -->
-    <function name="TexImage3D" alias="TexImage3DOES" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/>
-        <glx rop="4114" large="true"/>
-    </function>
-
-    <function name="TexSubImage3D" alias="TexSubImage3DOES" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4115" large="true"/>
-    </function>
-
-    <function name="CopyTexSubImage3D" alias="CopyTexSubImage3DOES" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4123"/>
-    </function>
-
-    <!-- GL_ARB_multitexture -->
-    <function name="ActiveTextureARB" alias="ActiveTexture" static_dispatch="false">
-        <param name="texture" type="GLenum"/>
-        <glx rop="197"/>
-    </function>
-
-    <function name="ClientActiveTextureARB" offset="375" static_dispatch="false">
-        <param name="texture" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="MultiTexCoord4fARB" offset="402" vectorequiv="MultiTexCoord4fvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-        <param name="q" type="GLfloat"/>
-    </function>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es2_EXT.xml b/src/mapi/glapi/gen-es/es2_EXT.xml
deleted file mode 100644
index 4a67952e5c3..00000000000
--- a/src/mapi/glapi/gen-es/es2_EXT.xml
+++ /dev/null
@@ -1,162 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL ES 2.x extensions -->
-
-<OpenGLAPI>
-
-<xi:include href="es_EXT.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<category name="GL_OES_texture_3D" number="34">
-    <enum name="TEXTURE_BINDING_3D_OES"                   value="0x806A"/>
-    <enum name="TEXTURE_3D_OES"                           value="0x806F"/>
-    <enum name="TEXTURE_WRAP_R_OES"                       value="0x8072"/>
-    <enum name="MAX_3D_TEXTURE_SIZE_OES"                  value="0x8073"/>
-    <enum name="SAMPLER_3D_OES"                           value="0x8B5F"/>
-    <enum name="FRAMEBUFFER_ATTACHMENT_TEXTURE_3D_ZOFFSET_OES" value="0x8CD4"/>
-
-    <function name="CompressedTexImage3DOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="imageSize" type="GLsizei" counter="true"/>
-        <param name="data" type="const GLvoid *" count="imageSize"/>
-        <glx rop="216" handcode="client"/>
-    </function>
-
-    <function name="CompressedTexSubImage3DOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="imageSize" type="GLsizei" counter="true"/>
-        <param name="data" type="const GLvoid *" count="imageSize"/>
-        <glx rop="219" handcode="client"/>
-    </function>
-
-    <function name="CopyTexSubImage3DOES" offset="373">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4123"/>
-    </function>
-
-    <function name="FramebufferTexture3DOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="attachment" type="GLenum"/>
-        <param name="textarget" type="GLenum"/>
-        <param name="texture" type="GLuint"/>
-        <param name="level" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <glx rop="4323"/>
-    </function>
-
-    <function name="TexImage3DOES" offset="371">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/>
-        <glx rop="4114" large="true"/>
-    </function>
-
-    <function name="TexSubImage3DOES" offset="372">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4115" large="true"/>
-    </function>
-</category>
-
-<!-- the other name is OES_texture_float_linear -->
-<category name="OES_texture_half_float_linear" number="35">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<!-- the other name is OES_texture_float -->
-<category name="OES_texture_half_float" number="36">
-    <enum name="HALF_FLOAT_OES"                           value="0x8D61"/>
-</category>
-
-<category name="GL_OES_texture_npot" number="37">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<category name="GL_OES_vertex_half_float" number="38">
-    <enum name="HALF_FLOAT_OES"                           value="0x8D61"/>
-</category>
-
-<category name="GL_EXT_texture_type_2_10_10_10_REV" number="42">
-    <enum name="UNSIGNED_INT_2_10_10_10_REV_EXT"          value="0x8368"/>
-</category>
-
-<category name="GL_OES_packed_depth_stencil" number="43">
-    <enum name="DEPTH_STENCIL_OES"                        value="0x84F9"/>
-    <enum name="UNSIGNED_INT_24_8_OES"                    value="0x84FA"/>
-    <enum name="DEPTH24_STENCIL8_OES"                     value="0x88F0"/>
-</category>
-
-<category name="GL_OES_depth_texture" number="44">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<category name="GL_OES_standard_derivatives" number="45">
-    <enum name="FRAGMENT_SHADER_DERIVATIVE_HINT_OES"      value="0x8B8B"/>
-</category>
-
-<category name="GL_OES_vertex_type_10_10_10_2" number="46">
-    <enum name="UNSIGNED_INT_10_10_10_2_OES"              value="0x8DF6"/>
-    <enum name="INT_10_10_10_2_OES"                       value="0x8DF7"/>
-</category>
-
-<category name="GL_OES_get_program_binary" number="47">
-    <enum name="PROGRAM_BINARY_LENGTH_OES"                value="0x8741"/>
-    <enum name="NUM_PROGRAM_BINARY_FORMATS_OES"           value="0x87FE"/>
-    <enum name="PROGRAM_BINARY_FORMATS_OES"               value="0x87FF"/>
-
-    <function name="GetProgramBinaryOES" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="bufSize" type="GLsizei"/>
-        <param name="length" type="GLsizei *"/>
-        <param name="binaryFormat" type="GLenum *"/>
-        <param name="binary" type="GLvoid *"/>
-    </function>
-
-    <function name="ProgramBinaryOES" offset="assign">
-        <param name="program" type="GLuint"/>
-        <param name="binaryFormat" type="GLenum"/>
-        <param name="binary" type="const GLvoid *"/>
-        <param name="length" type="GLint"/>
-    </function>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es_COMPAT.xml b/src/mapi/glapi/gen-es/es_COMPAT.xml
deleted file mode 100644
index 7c729261105..00000000000
--- a/src/mapi/glapi/gen-es/es_COMPAT.xml
+++ /dev/null
@@ -1,2646 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<OpenGLAPI>
-
-<!-- This file defines the following categories
-
-         a subset of 1.0
-         a subset of 1.1
-         a subset of 1.2
-         a subset of GL_ARB_multitexture
-         GL_APPLE_vertex_array_object
-
-     to make sure the generated glapi headers are compatible with Mesa.
-     It is included by es1_COMPAT.xml and es2_COMPAT.xml.
--->
-
-<category name="1.0">
-    <type name="double"  size="8"  float="true"    glx_name="FLOAT64"/>
-    <type name="clampd"  size="8"  float="true"    glx_name="FLOAT64"/>
-
-    <type name="float"   size="4"  float="true"    glx_name="FLOAT32"/>
-    <type name="clampf"  size="4"  float="true"    glx_name="FLOAT32"/>
-
-    <type name="int"     size="4"                  glx_name="CARD32"/>
-    <type name="uint"    size="4"  unsigned="true" glx_name="CARD32"/>
-    <type name="sizei"   size="4"  unsigned="true" glx_name="CARD32"/>
-    <type name="enum"    size="4"  unsigned="true" glx_name="ENUM"/>
-    <type name="bitfield" size="4" unsigned="true" glx_name="CARD32"/>
-
-    <type name="short"   size="2"                  glx_name="CARD16"/>
-    <type name="ushort"  size="2"  unsigned="true" glx_name="CARD16"/>
-
-    <type name="byte"    size="1"                  glx_name="CARD8"/>
-    <type name="ubyte"   size="1"  unsigned="true" glx_name="CARD8"/>
-    <type name="boolean" size="1"  unsigned="true" glx_name="CARD8"/>
-
-    <type name="void"    size="1"/>
-
-    <function name="NewList" offset="0" static_dispatch="false">
-        <param name="list" type="GLuint"/>
-        <param name="mode" type="GLenum"/>
-        <glx sop="101"/>
-    </function>
-
-    <function name="EndList" offset="1" static_dispatch="false">
-        <glx sop="102"/>
-    </function>
-
-    <function name="CallList" offset="2" static_dispatch="false">
-        <param name="list" type="GLuint"/>
-        <glx rop="1"/>
-    </function>
-
-    <function name="CallLists" offset="3" static_dispatch="false">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="type" type="GLenum"/>
-        <param name="lists" type="const GLvoid *" variable_param="type" count="n"/>
-        <glx rop="2" large="true"/>
-    </function>
-
-    <function name="DeleteLists" offset="4" static_dispatch="false">
-        <param name="list" type="GLuint"/>
-        <param name="range" type="GLsizei"/>
-        <glx sop="103"/>
-    </function>
-
-    <function name="GenLists" offset="5" static_dispatch="false">
-        <param name="range" type="GLsizei"/>
-        <return type="GLuint"/>
-        <glx sop="104"/>
-    </function>
-
-    <function name="ListBase" offset="6" static_dispatch="false">
-        <param name="base" type="GLuint"/>
-        <glx rop="3"/>
-    </function>
-
-    <function name="Begin" offset="7" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="4"/>
-    </function>
-
-    <function name="Bitmap" offset="8" static_dispatch="false">
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="xorig" type="GLfloat"/>
-        <param name="yorig" type="GLfloat"/>
-        <param name="xmove" type="GLfloat"/>
-        <param name="ymove" type="GLfloat"/>
-        <param name="bitmap" type="const GLubyte *" img_width="width" img_height="height" img_format="GL_COLOR_INDEX" img_type="GL_BITMAP" img_target="0" img_pad_dimensions="false"/>
-        <glx rop="5" large="true"/>
-    </function>
-
-    <function name="Color3b" offset="9" vectorequiv="Color3bv" static_dispatch="false">
-        <param name="red" type="GLbyte"/>
-        <param name="green" type="GLbyte"/>
-        <param name="blue" type="GLbyte"/>
-    </function>
-
-    <function name="Color3bv" offset="10" static_dispatch="false">
-        <param name="v" type="const GLbyte *" count="3"/>
-        <glx rop="6"/>
-    </function>
-
-    <function name="Color3d" offset="11" vectorequiv="Color3dv" static_dispatch="false">
-        <param name="red" type="GLdouble"/>
-        <param name="green" type="GLdouble"/>
-        <param name="blue" type="GLdouble"/>
-    </function>
-
-    <function name="Color3dv" offset="12" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="3"/>
-        <glx rop="7"/>
-    </function>
-
-    <function name="Color3f" offset="13" vectorequiv="Color3fv" static_dispatch="false">
-        <param name="red" type="GLfloat"/>
-        <param name="green" type="GLfloat"/>
-        <param name="blue" type="GLfloat"/>
-    </function>
-
-    <function name="Color3fv" offset="14" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="3"/>
-        <glx rop="8"/>
-    </function>
-
-    <function name="Color3i" offset="15" vectorequiv="Color3iv" static_dispatch="false">
-        <param name="red" type="GLint"/>
-        <param name="green" type="GLint"/>
-        <param name="blue" type="GLint"/>
-    </function>
-
-    <function name="Color3iv" offset="16" static_dispatch="false">
-        <param name="v" type="const GLint *" count="3"/>
-        <glx rop="9"/>
-    </function>
-
-    <function name="Color3s" offset="17" vectorequiv="Color3sv" static_dispatch="false">
-        <param name="red" type="GLshort"/>
-        <param name="green" type="GLshort"/>
-        <param name="blue" type="GLshort"/>
-    </function>
-
-    <function name="Color3sv" offset="18" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="3"/>
-        <glx rop="10"/>
-    </function>
-
-    <function name="Color3ub" offset="19" vectorequiv="Color3ubv" static_dispatch="false">
-        <param name="red" type="GLubyte"/>
-        <param name="green" type="GLubyte"/>
-        <param name="blue" type="GLubyte"/>
-    </function>
-
-    <function name="Color3ubv" offset="20" static_dispatch="false">
-        <param name="v" type="const GLubyte *" count="3"/>
-        <glx rop="11"/>
-    </function>
-
-    <function name="Color3ui" offset="21" vectorequiv="Color3uiv" static_dispatch="false">
-        <param name="red" type="GLuint"/>
-        <param name="green" type="GLuint"/>
-        <param name="blue" type="GLuint"/>
-    </function>
-
-    <function name="Color3uiv" offset="22" static_dispatch="false">
-        <param name="v" type="const GLuint *" count="3"/>
-        <glx rop="12"/>
-    </function>
-
-    <function name="Color3us" offset="23" vectorequiv="Color3usv" static_dispatch="false">
-        <param name="red" type="GLushort"/>
-        <param name="green" type="GLushort"/>
-        <param name="blue" type="GLushort"/>
-    </function>
-
-    <function name="Color3usv" offset="24" static_dispatch="false">
-        <param name="v" type="const GLushort *" count="3"/>
-        <glx rop="13"/>
-    </function>
-
-    <function name="Color4b" offset="25" vectorequiv="Color4bv" static_dispatch="false">
-        <param name="red" type="GLbyte"/>
-        <param name="green" type="GLbyte"/>
-        <param name="blue" type="GLbyte"/>
-        <param name="alpha" type="GLbyte"/>
-    </function>
-
-    <function name="Color4bv" offset="26" static_dispatch="false">
-        <param name="v" type="const GLbyte *" count="4"/>
-        <glx rop="14"/>
-    </function>
-
-    <function name="Color4d" offset="27" vectorequiv="Color4dv" static_dispatch="false">
-        <param name="red" type="GLdouble"/>
-        <param name="green" type="GLdouble"/>
-        <param name="blue" type="GLdouble"/>
-        <param name="alpha" type="GLdouble"/>
-    </function>
-
-    <function name="Color4dv" offset="28" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="4"/>
-        <glx rop="15"/>
-    </function>
-
-    <!--function name="Color4f" offset="29" vectorequiv="Color4fv" static_dispatch="false">
-        <param name="red" type="GLfloat"/>
-        <param name="green" type="GLfloat"/>
-        <param name="blue" type="GLfloat"/>
-        <param name="alpha" type="GLfloat"/>
-    </function-->
-
-    <function name="Color4fv" offset="30" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="4"/>
-        <glx rop="16"/>
-    </function>
-
-    <function name="Color4i" offset="31" vectorequiv="Color4iv" static_dispatch="false">
-        <param name="red" type="GLint"/>
-        <param name="green" type="GLint"/>
-        <param name="blue" type="GLint"/>
-        <param name="alpha" type="GLint"/>
-    </function>
-
-    <function name="Color4iv" offset="32" static_dispatch="false">
-        <param name="v" type="const GLint *" count="4"/>
-        <glx rop="17"/>
-    </function>
-
-    <function name="Color4s" offset="33" vectorequiv="Color4sv" static_dispatch="false">
-        <param name="red" type="GLshort"/>
-        <param name="green" type="GLshort"/>
-        <param name="blue" type="GLshort"/>
-        <param name="alpha" type="GLshort"/>
-    </function>
-
-    <function name="Color4sv" offset="34" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="4"/>
-        <glx rop="18"/>
-    </function>
-
-    <!--function name="Color4ub" offset="35" vectorequiv="Color4ubv" static_dispatch="false">
-        <param name="red" type="GLubyte"/>
-        <param name="green" type="GLubyte"/>
-        <param name="blue" type="GLubyte"/>
-        <param name="alpha" type="GLubyte"/>
-    </function-->
-
-    <function name="Color4ubv" offset="36" static_dispatch="false">
-        <param name="v" type="const GLubyte *" count="4"/>
-        <glx rop="19"/>
-    </function>
-
-    <function name="Color4ui" offset="37" vectorequiv="Color4uiv" static_dispatch="false">
-        <param name="red" type="GLuint"/>
-        <param name="green" type="GLuint"/>
-        <param name="blue" type="GLuint"/>
-        <param name="alpha" type="GLuint"/>
-    </function>
-
-    <function name="Color4uiv" offset="38" static_dispatch="false">
-        <param name="v" type="const GLuint *" count="4"/>
-        <glx rop="20"/>
-    </function>
-
-    <function name="Color4us" offset="39" vectorequiv="Color4usv" static_dispatch="false">
-        <param name="red" type="GLushort"/>
-        <param name="green" type="GLushort"/>
-        <param name="blue" type="GLushort"/>
-        <param name="alpha" type="GLushort"/>
-    </function>
-
-    <function name="Color4usv" offset="40" static_dispatch="false">
-        <param name="v" type="const GLushort *" count="4"/>
-        <glx rop="21"/>
-    </function>
-
-    <function name="EdgeFlag" offset="41" vectorequiv="EdgeFlagv" static_dispatch="false">
-        <param name="flag" type="GLboolean"/>
-    </function>
-
-    <function name="EdgeFlagv" offset="42" static_dispatch="false">
-        <param name="flag" type="const GLboolean *" count="1"/>
-        <glx rop="22"/>
-    </function>
-
-    <function name="End" offset="43" static_dispatch="false">
-        <glx rop="23"/>
-    </function>
-
-    <function name="Indexd" offset="44" vectorequiv="Indexdv" static_dispatch="false">
-        <param name="c" type="GLdouble"/>
-    </function>
-
-    <function name="Indexdv" offset="45" static_dispatch="false">
-        <param name="c" type="const GLdouble *" count="1"/>
-        <glx rop="24"/>
-    </function>
-
-    <function name="Indexf" offset="46" vectorequiv="Indexfv" static_dispatch="false">
-        <param name="c" type="GLfloat"/>
-    </function>
-
-    <function name="Indexfv" offset="47" static_dispatch="false">
-        <param name="c" type="const GLfloat *" count="1"/>
-        <glx rop="25"/>
-    </function>
-
-    <function name="Indexi" offset="48" vectorequiv="Indexiv" static_dispatch="false">
-        <param name="c" type="GLint"/>
-    </function>
-
-    <function name="Indexiv" offset="49" static_dispatch="false">
-        <param name="c" type="const GLint *" count="1"/>
-        <glx rop="26"/>
-    </function>
-
-    <function name="Indexs" offset="50" vectorequiv="Indexsv" static_dispatch="false">
-        <param name="c" type="GLshort"/>
-    </function>
-
-    <function name="Indexsv" offset="51" static_dispatch="false">
-        <param name="c" type="const GLshort *" count="1"/>
-        <glx rop="27"/>
-    </function>
-
-    <function name="Normal3b" offset="52" vectorequiv="Normal3bv" static_dispatch="false">
-        <param name="nx" type="GLbyte"/>
-        <param name="ny" type="GLbyte"/>
-        <param name="nz" type="GLbyte"/>
-    </function>
-
-    <function name="Normal3bv" offset="53" static_dispatch="false">
-        <param name="v" type="const GLbyte *" count="3"/>
-        <glx rop="28"/>
-    </function>
-
-    <function name="Normal3d" offset="54" vectorequiv="Normal3dv" static_dispatch="false">
-        <param name="nx" type="GLdouble"/>
-        <param name="ny" type="GLdouble"/>
-        <param name="nz" type="GLdouble"/>
-    </function>
-
-    <function name="Normal3dv" offset="55" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="3"/>
-        <glx rop="29"/>
-    </function>
-
-    <!--function name="Normal3f" offset="56" vectorequiv="Normal3fv" static_dispatch="false">
-        <param name="nx" type="GLfloat"/>
-        <param name="ny" type="GLfloat"/>
-        <param name="nz" type="GLfloat"/>
-    </function-->
-
-    <function name="Normal3fv" offset="57" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="3"/>
-        <glx rop="30"/>
-    </function>
-
-    <function name="Normal3i" offset="58" vectorequiv="Normal3iv" static_dispatch="false">
-        <param name="nx" type="GLint"/>
-        <param name="ny" type="GLint"/>
-        <param name="nz" type="GLint"/>
-    </function>
-
-    <function name="Normal3iv" offset="59" static_dispatch="false">
-        <param name="v" type="const GLint *" count="3"/>
-        <glx rop="31"/>
-    </function>
-
-    <function name="Normal3s" offset="60" vectorequiv="Normal3sv" static_dispatch="false">
-        <param name="nx" type="GLshort"/>
-        <param name="ny" type="GLshort"/>
-        <param name="nz" type="GLshort"/>
-    </function>
-
-    <function name="Normal3sv" offset="61" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="3"/>
-        <glx rop="32"/>
-    </function>
-
-    <function name="RasterPos2d" offset="62" vectorequiv="RasterPos2dv" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-    </function>
-
-    <function name="RasterPos2dv" offset="63" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="2"/>
-        <glx rop="33"/>
-    </function>
-
-    <function name="RasterPos2f" offset="64" vectorequiv="RasterPos2fv" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-    </function>
-
-    <function name="RasterPos2fv" offset="65" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="2"/>
-        <glx rop="34"/>
-    </function>
-
-    <function name="RasterPos2i" offset="66" vectorequiv="RasterPos2iv" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-    </function>
-
-    <function name="RasterPos2iv" offset="67" static_dispatch="false">
-        <param name="v" type="const GLint *" count="2"/>
-        <glx rop="35"/>
-    </function>
-
-    <function name="RasterPos2s" offset="68" vectorequiv="RasterPos2sv" static_dispatch="false">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-    </function>
-
-    <function name="RasterPos2sv" offset="69" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="2"/>
-        <glx rop="36"/>
-    </function>
-
-    <function name="RasterPos3d" offset="70" vectorequiv="RasterPos3dv" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-    </function>
-
-    <function name="RasterPos3dv" offset="71" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="3"/>
-        <glx rop="37"/>
-    </function>
-
-    <function name="RasterPos3f" offset="72" vectorequiv="RasterPos3fv" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-    </function>
-
-    <function name="RasterPos3fv" offset="73" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="3"/>
-        <glx rop="38"/>
-    </function>
-
-    <function name="RasterPos3i" offset="74" vectorequiv="RasterPos3iv" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="z" type="GLint"/>
-    </function>
-
-    <function name="RasterPos3iv" offset="75" static_dispatch="false">
-        <param name="v" type="const GLint *" count="3"/>
-        <glx rop="39"/>
-    </function>
-
-    <function name="RasterPos3s" offset="76" vectorequiv="RasterPos3sv" static_dispatch="false">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-        <param name="z" type="GLshort"/>
-    </function>
-
-    <function name="RasterPos3sv" offset="77" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="3"/>
-        <glx rop="40"/>
-    </function>
-
-    <function name="RasterPos4d" offset="78" vectorequiv="RasterPos4dv" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-        <param name="w" type="GLdouble"/>
-    </function>
-
-    <function name="RasterPos4dv" offset="79" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="4"/>
-        <glx rop="41"/>
-    </function>
-
-    <function name="RasterPos4f" offset="80" vectorequiv="RasterPos4fv" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <param name="w" type="GLfloat"/>
-    </function>
-
-    <function name="RasterPos4fv" offset="81" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="4"/>
-        <glx rop="42"/>
-    </function>
-
-    <function name="RasterPos4i" offset="82" vectorequiv="RasterPos4iv" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="z" type="GLint"/>
-        <param name="w" type="GLint"/>
-    </function>
-
-    <function name="RasterPos4iv" offset="83" static_dispatch="false">
-        <param name="v" type="const GLint *" count="4"/>
-        <glx rop="43"/>
-    </function>
-
-    <function name="RasterPos4s" offset="84" vectorequiv="RasterPos4sv" static_dispatch="false">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-        <param name="z" type="GLshort"/>
-        <param name="w" type="GLshort"/>
-    </function>
-
-    <function name="RasterPos4sv" offset="85" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="4"/>
-        <glx rop="44"/>
-    </function>
-
-    <function name="Rectd" offset="86" vectorequiv="Rectdv" static_dispatch="false">
-        <param name="x1" type="GLdouble"/>
-        <param name="y1" type="GLdouble"/>
-        <param name="x2" type="GLdouble"/>
-        <param name="y2" type="GLdouble"/>
-    </function>
-
-    <function name="Rectdv" offset="87" static_dispatch="false">
-        <param name="v1" type="const GLdouble *" count="2"/>
-        <param name="v2" type="const GLdouble *" count="2"/>
-        <glx rop="45"/>
-    </function>
-
-    <function name="Rectf" offset="88" vectorequiv="Rectfv" static_dispatch="false">
-        <param name="x1" type="GLfloat"/>
-        <param name="y1" type="GLfloat"/>
-        <param name="x2" type="GLfloat"/>
-        <param name="y2" type="GLfloat"/>
-    </function>
-
-    <function name="Rectfv" offset="89" static_dispatch="false">
-        <param name="v1" type="const GLfloat *" count="2"/>
-        <param name="v2" type="const GLfloat *" count="2"/>
-        <glx rop="46"/>
-    </function>
-
-    <function name="Recti" offset="90" vectorequiv="Rectiv" static_dispatch="false">
-        <param name="x1" type="GLint"/>
-        <param name="y1" type="GLint"/>
-        <param name="x2" type="GLint"/>
-        <param name="y2" type="GLint"/>
-    </function>
-
-    <function name="Rectiv" offset="91" static_dispatch="false">
-        <param name="v1" type="const GLint *" count="2"/>
-        <param name="v2" type="const GLint *" count="2"/>
-        <glx rop="47"/>
-    </function>
-
-    <function name="Rects" offset="92" vectorequiv="Rectsv" static_dispatch="false">
-        <param name="x1" type="GLshort"/>
-        <param name="y1" type="GLshort"/>
-        <param name="x2" type="GLshort"/>
-        <param name="y2" type="GLshort"/>
-    </function>
-
-    <function name="Rectsv" offset="93" static_dispatch="false">
-        <param name="v1" type="const GLshort *" count="2"/>
-        <param name="v2" type="const GLshort *" count="2"/>
-        <glx rop="48"/>
-    </function>
-
-    <function name="TexCoord1d" offset="94" vectorequiv="TexCoord1dv" static_dispatch="false">
-        <param name="s" type="GLdouble"/>
-    </function>
-
-    <function name="TexCoord1dv" offset="95" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="1"/>
-        <glx rop="49"/>
-    </function>
-
-    <function name="TexCoord1f" offset="96" vectorequiv="TexCoord1fv" static_dispatch="false">
-        <param name="s" type="GLfloat"/>
-    </function>
-
-    <function name="TexCoord1fv" offset="97" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="1"/>
-        <glx rop="50"/>
-    </function>
-
-    <function name="TexCoord1i" offset="98" vectorequiv="TexCoord1iv" static_dispatch="false">
-        <param name="s" type="GLint"/>
-    </function>
-
-    <function name="TexCoord1iv" offset="99" static_dispatch="false">
-        <param name="v" type="const GLint *" count="1"/>
-        <glx rop="51"/>
-    </function>
-
-    <function name="TexCoord1s" offset="100" vectorequiv="TexCoord1sv" static_dispatch="false">
-        <param name="s" type="GLshort"/>
-    </function>
-
-    <function name="TexCoord1sv" offset="101" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="1"/>
-        <glx rop="52"/>
-    </function>
-
-    <function name="TexCoord2d" offset="102" vectorequiv="TexCoord2dv" static_dispatch="false">
-        <param name="s" type="GLdouble"/>
-        <param name="t" type="GLdouble"/>
-    </function>
-
-    <function name="TexCoord2dv" offset="103" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="2"/>
-        <glx rop="53"/>
-    </function>
-
-    <function name="TexCoord2f" offset="104" vectorequiv="TexCoord2fv" static_dispatch="false">
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-    </function>
-
-    <function name="TexCoord2fv" offset="105" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="2"/>
-        <glx rop="54"/>
-    </function>
-
-    <function name="TexCoord2i" offset="106" vectorequiv="TexCoord2iv" static_dispatch="false">
-        <param name="s" type="GLint"/>
-        <param name="t" type="GLint"/>
-    </function>
-
-    <function name="TexCoord2iv" offset="107" static_dispatch="false">
-        <param name="v" type="const GLint *" count="2"/>
-        <glx rop="55"/>
-    </function>
-
-    <function name="TexCoord2s" offset="108" vectorequiv="TexCoord2sv" static_dispatch="false">
-        <param name="s" type="GLshort"/>
-        <param name="t" type="GLshort"/>
-    </function>
-
-    <function name="TexCoord2sv" offset="109" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="2"/>
-        <glx rop="56"/>
-    </function>
-
-    <function name="TexCoord3d" offset="110" vectorequiv="TexCoord3dv" static_dispatch="false">
-        <param name="s" type="GLdouble"/>
-        <param name="t" type="GLdouble"/>
-        <param name="r" type="GLdouble"/>
-    </function>
-
-    <function name="TexCoord3dv" offset="111" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="3"/>
-        <glx rop="57"/>
-    </function>
-
-    <function name="TexCoord3f" offset="112" vectorequiv="TexCoord3fv" static_dispatch="false">
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-    </function>
-
-    <function name="TexCoord3fv" offset="113" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="3"/>
-        <glx rop="58"/>
-    </function>
-
-    <function name="TexCoord3i" offset="114" vectorequiv="TexCoord3iv" static_dispatch="false">
-        <param name="s" type="GLint"/>
-        <param name="t" type="GLint"/>
-        <param name="r" type="GLint"/>
-    </function>
-
-    <function name="TexCoord3iv" offset="115" static_dispatch="false">
-        <param name="v" type="const GLint *" count="3"/>
-        <glx rop="59"/>
-    </function>
-
-    <function name="TexCoord3s" offset="116" vectorequiv="TexCoord3sv" static_dispatch="false">
-        <param name="s" type="GLshort"/>
-        <param name="t" type="GLshort"/>
-        <param name="r" type="GLshort"/>
-    </function>
-
-    <function name="TexCoord3sv" offset="117" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="3"/>
-        <glx rop="60"/>
-    </function>
-
-    <function name="TexCoord4d" offset="118" vectorequiv="TexCoord4dv" static_dispatch="false">
-        <param name="s" type="GLdouble"/>
-        <param name="t" type="GLdouble"/>
-        <param name="r" type="GLdouble"/>
-        <param name="q" type="GLdouble"/>
-    </function>
-
-    <function name="TexCoord4dv" offset="119" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="4"/>
-        <glx rop="61"/>
-    </function>
-
-    <function name="TexCoord4f" offset="120" vectorequiv="TexCoord4fv" static_dispatch="false">
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-        <param name="q" type="GLfloat"/>
-    </function>
-
-    <function name="TexCoord4fv" offset="121" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="4"/>
-        <glx rop="62"/>
-    </function>
-
-    <function name="TexCoord4i" offset="122" vectorequiv="TexCoord4iv" static_dispatch="false">
-        <param name="s" type="GLint"/>
-        <param name="t" type="GLint"/>
-        <param name="r" type="GLint"/>
-        <param name="q" type="GLint"/>
-    </function>
-
-    <function name="TexCoord4iv" offset="123" static_dispatch="false">
-        <param name="v" type="const GLint *" count="4"/>
-        <glx rop="63"/>
-    </function>
-
-    <function name="TexCoord4s" offset="124" vectorequiv="TexCoord4sv" static_dispatch="false">
-        <param name="s" type="GLshort"/>
-        <param name="t" type="GLshort"/>
-        <param name="r" type="GLshort"/>
-        <param name="q" type="GLshort"/>
-    </function>
-
-    <function name="TexCoord4sv" offset="125" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="4"/>
-        <glx rop="64"/>
-    </function>
-
-    <function name="Vertex2d" offset="126" vectorequiv="Vertex2dv" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-    </function>
-
-    <function name="Vertex2dv" offset="127" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="2"/>
-        <glx rop="65"/>
-    </function>
-
-    <function name="Vertex2f" offset="128" vectorequiv="Vertex2fv" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-    </function>
-
-    <function name="Vertex2fv" offset="129" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="2"/>
-        <glx rop="66"/>
-    </function>
-
-    <function name="Vertex2i" offset="130" vectorequiv="Vertex2iv" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-    </function>
-
-    <function name="Vertex2iv" offset="131" static_dispatch="false">
-        <param name="v" type="const GLint *" count="2"/>
-        <glx rop="67"/>
-    </function>
-
-    <function name="Vertex2s" offset="132" vectorequiv="Vertex2sv" static_dispatch="false">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-    </function>
-
-    <function name="Vertex2sv" offset="133" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="2"/>
-        <glx rop="68"/>
-    </function>
-
-    <function name="Vertex3d" offset="134" vectorequiv="Vertex3dv" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-    </function>
-
-    <function name="Vertex3dv" offset="135" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="3"/>
-        <glx rop="69"/>
-    </function>
-
-    <function name="Vertex3f" offset="136" vectorequiv="Vertex3fv" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-    </function>
-
-    <function name="Vertex3fv" offset="137" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="3"/>
-        <glx rop="70"/>
-    </function>
-
-    <function name="Vertex3i" offset="138" vectorequiv="Vertex3iv" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="z" type="GLint"/>
-    </function>
-
-    <function name="Vertex3iv" offset="139" static_dispatch="false">
-        <param name="v" type="const GLint *" count="3"/>
-        <glx rop="71"/>
-    </function>
-
-    <function name="Vertex3s" offset="140" vectorequiv="Vertex3sv" static_dispatch="false">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-        <param name="z" type="GLshort"/>
-    </function>
-
-    <function name="Vertex3sv" offset="141" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="3"/>
-        <glx rop="72"/>
-    </function>
-
-    <function name="Vertex4d" offset="142" vectorequiv="Vertex4dv" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-        <param name="w" type="GLdouble"/>
-    </function>
-
-    <function name="Vertex4dv" offset="143" static_dispatch="false">
-        <param name="v" type="const GLdouble *" count="4"/>
-        <glx rop="73"/>
-    </function>
-
-    <function name="Vertex4f" offset="144" vectorequiv="Vertex4fv" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <param name="w" type="GLfloat"/>
-    </function>
-
-    <function name="Vertex4fv" offset="145" static_dispatch="false">
-        <param name="v" type="const GLfloat *" count="4"/>
-        <glx rop="74"/>
-    </function>
-
-    <function name="Vertex4i" offset="146" vectorequiv="Vertex4iv" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="z" type="GLint"/>
-        <param name="w" type="GLint"/>
-    </function>
-
-    <function name="Vertex4iv" offset="147" static_dispatch="false">
-        <param name="v" type="const GLint *" count="4"/>
-        <glx rop="75"/>
-    </function>
-
-    <function name="Vertex4s" offset="148" vectorequiv="Vertex4sv" static_dispatch="false">
-        <param name="x" type="GLshort"/>
-        <param name="y" type="GLshort"/>
-        <param name="z" type="GLshort"/>
-        <param name="w" type="GLshort"/>
-    </function>
-
-    <function name="Vertex4sv" offset="149" static_dispatch="false">
-        <param name="v" type="const GLshort *" count="4"/>
-        <glx rop="76"/>
-    </function>
-
-    <function name="ClipPlane" offset="150" static_dispatch="false">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="const GLdouble *" count="4"/>
-        <glx rop="77"/>
-    </function>
-
-    <function name="ColorMaterial" offset="151" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="mode" type="GLenum"/>
-        <glx rop="78"/>
-    </function>
-
-    <!--function name="CullFace" offset="152" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="79"/>
-    </function>
-
-    <function name="Fogf" offset="153" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="80"/>
-    </function>
-
-    <function name="Fogfv" offset="154" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="81"/>
-    </function-->
-
-    <function name="Fogi" offset="155" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="82"/>
-    </function>
-
-    <function name="Fogiv" offset="156" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="83"/>
-    </function>
-
-    <!--function name="FrontFace" offset="157" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="84"/>
-    </function>
-
-    <function name="Hint" offset="158" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="mode" type="GLenum"/>
-        <glx rop="85"/>
-    </function>
-
-    <function name="Lightf" offset="159" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="86"/>
-    </function>
-
-    <function name="Lightfv" offset="160" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="87"/>
-    </function-->
-
-    <function name="Lighti" offset="161" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="88"/>
-    </function>
-
-    <function name="Lightiv" offset="162" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="89"/>
-    </function>
-
-    <!--function name="LightModelf" offset="163" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="90"/>
-    </function>
-
-    <function name="LightModelfv" offset="164" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="91"/>
-    </function-->
-
-    <function name="LightModeli" offset="165" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="92"/>
-    </function>
-
-    <function name="LightModeliv" offset="166" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="93"/>
-    </function>
-
-    <function name="LineStipple" offset="167" static_dispatch="false">
-        <param name="factor" type="GLint"/>
-        <param name="pattern" type="GLushort"/>
-        <glx rop="94"/>
-    </function>
-
-    <!--function name="LineWidth" offset="168" static_dispatch="false">
-        <param name="width" type="GLfloat"/>
-        <glx rop="95"/>
-    </function>
-
-    <function name="Materialf" offset="169" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="96"/>
-    </function>
-
-    <function name="Materialfv" offset="170" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="97"/>
-    </function-->
-
-    <function name="Materiali" offset="171" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="98"/>
-    </function>
-
-    <function name="Materialiv" offset="172" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="99"/>
-    </function>
-
-    <!--function name="PointSize" offset="173" static_dispatch="false">
-        <param name="size" type="GLfloat"/>
-        <glx rop="100"/>
-    </function-->
-
-    <function name="PolygonMode" offset="174" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="mode" type="GLenum"/>
-        <glx rop="101"/>
-    </function>
-
-    <function name="PolygonStipple" offset="175" static_dispatch="false">
-        <param name="mask" type="const GLubyte *" img_width="32" img_height="32" img_format="GL_COLOR_INDEX" img_type="GL_BITMAP" img_target="0" img_pad_dimensions="false"/>
-        <glx rop="102"/>
-    </function>
-
-    <!--function name="Scissor" offset="176" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="103"/>
-    </function>
-
-    <function name="ShadeModel" offset="177" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="104"/>
-    </function>
-
-    <function name="TexParameterf" offset="178" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="105"/>
-    </function>
-
-    <function name="TexParameterfv" offset="179" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="106"/>
-    </function>
-
-    <function name="TexParameteri" offset="180" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="107"/>
-    </function>
-
-    <function name="TexParameteriv" offset="181" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="108"/>
-    </function-->
-
-    <function name="TexImage1D" offset="182" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_format="format" img_type="type" img_target="target" img_send_null="true" img_pad_dimensions="true"/>
-        <glx rop="109" large="true"/>
-    </function>
-
-    <!--function name="TexImage2D" offset="183" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="target" img_send_null="true" img_pad_dimensions="true"/>
-        <glx rop="110" large="true"/>
-    </function>
-
-    <function name="TexEnvf" offset="184" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="111"/>
-    </function>
-
-    <function name="TexEnvfv" offset="185" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="112"/>
-    </function>
-
-    <function name="TexEnvi" offset="186" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="113"/>
-    </function>
-
-    <function name="TexEnviv" offset="187" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="114"/>
-    </function-->
-
-    <function name="TexGend" offset="188" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLdouble"/>
-        <glx rop="115"/>
-    </function>
-
-    <function name="TexGendv" offset="189" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLdouble *" variable_param="pname"/>
-        <glx rop="116"/>
-    </function>
-
-    <!--function name="TexGenf" offset="190" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="117"/>
-    </function>
-
-    <function name="TexGenfv" offset="191" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="118"/>
-    </function>
-
-    <function name="TexGeni" offset="192" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="119"/>
-    </function>
-
-    <function name="TexGeniv" offset="193" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="120"/>
-    </function-->
-
-    <function name="FeedbackBuffer" offset="194" static_dispatch="false">
-        <param name="size" type="GLsizei"/>
-        <param name="type" type="GLenum"/>
-        <param name="buffer" type="GLfloat *" output="true"/>
-        <glx sop="105" handcode="true"/>
-    </function>
-
-    <function name="SelectBuffer" offset="195" static_dispatch="false">
-        <param name="size" type="GLsizei"/>
-        <param name="buffer" type="GLuint *" output="true"/>
-        <glx sop="106" handcode="true"/>
-    </function>
-
-    <function name="RenderMode" offset="196" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <return type="GLint"/>
-        <glx sop="107" handcode="true"/>
-    </function>
-
-    <function name="InitNames" offset="197" static_dispatch="false">
-        <glx rop="121"/>
-    </function>
-
-    <function name="LoadName" offset="198" static_dispatch="false">
-        <param name="name" type="GLuint"/>
-        <glx rop="122"/>
-    </function>
-
-    <function name="PassThrough" offset="199" static_dispatch="false">
-        <param name="token" type="GLfloat"/>
-        <glx rop="123"/>
-    </function>
-
-    <function name="PopName" offset="200" static_dispatch="false">
-        <glx rop="124"/>
-    </function>
-
-    <function name="PushName" offset="201" static_dispatch="false">
-        <param name="name" type="GLuint"/>
-        <glx rop="125"/>
-    </function>
-
-    <function name="DrawBuffer" offset="202" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="126"/>
-    </function>
-
-    <!--function name="Clear" offset="203" static_dispatch="false">
-        <param name="mask" type="GLbitfield"/>
-        <glx rop="127"/>
-    </function-->
-
-    <function name="ClearAccum" offset="204" static_dispatch="false">
-        <param name="red" type="GLfloat"/>
-        <param name="green" type="GLfloat"/>
-        <param name="blue" type="GLfloat"/>
-        <param name="alpha" type="GLfloat"/>
-        <glx rop="128"/>
-    </function>
-
-    <function name="ClearIndex" offset="205" static_dispatch="false">
-        <param name="c" type="GLfloat"/>
-        <glx rop="129"/>
-    </function>
-
-    <!--function name="ClearColor" offset="206" static_dispatch="false">
-        <param name="red" type="GLclampf"/>
-        <param name="green" type="GLclampf"/>
-        <param name="blue" type="GLclampf"/>
-        <param name="alpha" type="GLclampf"/>
-        <glx rop="130"/>
-    </function>
-
-    <function name="ClearStencil" offset="207" static_dispatch="false">
-        <param name="s" type="GLint"/>
-        <glx rop="131"/>
-    </function-->
-
-    <function name="ClearDepth" offset="208" static_dispatch="false">
-        <param name="depth" type="GLclampd"/>
-        <glx rop="132"/>
-    </function>
-
-    <!--function name="StencilMask" offset="209" static_dispatch="false">
-        <param name="mask" type="GLuint"/>
-        <glx rop="133"/>
-    </function>
-
-    <function name="ColorMask" offset="210" static_dispatch="false">
-        <param name="red" type="GLboolean"/>
-        <param name="green" type="GLboolean"/>
-        <param name="blue" type="GLboolean"/>
-        <param name="alpha" type="GLboolean"/>
-        <glx rop="134"/>
-    </function>
-
-    <function name="DepthMask" offset="211" static_dispatch="false">
-        <param name="flag" type="GLboolean"/>
-        <glx rop="135"/>
-    </function-->
-
-    <function name="IndexMask" offset="212" static_dispatch="false">
-        <param name="mask" type="GLuint"/>
-        <glx rop="136"/>
-    </function>
-
-    <function name="Accum" offset="213" static_dispatch="false">
-        <param name="op" type="GLenum"/>
-        <param name="value" type="GLfloat"/>
-        <glx rop="137"/>
-    </function>
-
-    <!--function name="Disable" offset="214" static_dispatch="false">
-        <param name="cap" type="GLenum"/>
-        <glx rop="138" handcode="client"/>
-    </function>
-
-    <function name="Enable" offset="215" static_dispatch="false">
-        <param name="cap" type="GLenum"/>
-        <glx rop="139" handcode="client"/>
-    </function>
-
-    <function name="Finish" offset="216" static_dispatch="false">
-        <glx sop="108" handcode="true"/>
-    </function>
-
-    <function name="Flush" offset="217" static_dispatch="false">
-        <glx sop="142" handcode="true"/>
-    </function-->
-
-    <function name="PopAttrib" offset="218" static_dispatch="false">
-        <glx rop="141"/>
-    </function>
-
-    <function name="PushAttrib" offset="219" static_dispatch="false">
-        <param name="mask" type="GLbitfield"/>
-        <glx rop="142"/>
-    </function>
-
-    <function name="Map1d" offset="220" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="u1" type="GLdouble"/>
-        <param name="u2" type="GLdouble"/>
-        <param name="stride" type="GLint" client_only="true"/>
-        <param name="order" type="GLint"/>
-        <param name="points" type="const GLdouble *" variable_param="order"/>
-        <glx rop="143" handcode="true"/>
-    </function>
-
-    <function name="Map1f" offset="221" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="u1" type="GLfloat"/>
-        <param name="u2" type="GLfloat"/>
-        <param name="stride" type="GLint" client_only="true"/>
-        <param name="order" type="GLint"/>
-        <param name="points" type="const GLfloat *" variable_param="order"/>
-        <glx rop="144" handcode="true"/>
-    </function>
-
-    <function name="Map2d" offset="222" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="u1" type="GLdouble"/>
-        <param name="u2" type="GLdouble"/>
-        <param name="ustride" type="GLint" client_only="true"/>
-        <param name="uorder" type="GLint"/>
-        <param name="v1" type="GLdouble"/>
-        <param name="v2" type="GLdouble"/>
-        <param name="vstride" type="GLint" client_only="true"/>
-        <param name="vorder" type="GLint"/>
-        <param name="points" type="const GLdouble *" variable_param="uorder"/>
-        <glx rop="145" handcode="true"/>
-    </function>
-
-    <function name="Map2f" offset="223" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="u1" type="GLfloat"/>
-        <param name="u2" type="GLfloat"/>
-        <param name="ustride" type="GLint" client_only="true"/>
-        <param name="uorder" type="GLint"/>
-        <param name="v1" type="GLfloat"/>
-        <param name="v2" type="GLfloat"/>
-        <param name="vstride" type="GLint" client_only="true"/>
-        <param name="vorder" type="GLint"/>
-        <param name="points" type="const GLfloat *" variable_param="uorder"/>
-        <glx rop="146" handcode="true"/>
-    </function>
-
-    <function name="MapGrid1d" offset="224" static_dispatch="false">
-        <param name="un" type="GLint"/>
-        <param name="u1" type="GLdouble"/>
-        <param name="u2" type="GLdouble"/>
-        <glx rop="147"/>
-    </function>
-
-    <function name="MapGrid1f" offset="225" static_dispatch="false">
-        <param name="un" type="GLint"/>
-        <param name="u1" type="GLfloat"/>
-        <param name="u2" type="GLfloat"/>
-        <glx rop="148"/>
-    </function>
-
-    <function name="MapGrid2d" offset="226" static_dispatch="false">
-        <param name="un" type="GLint"/>
-        <param name="u1" type="GLdouble"/>
-        <param name="u2" type="GLdouble"/>
-        <param name="vn" type="GLint"/>
-        <param name="v1" type="GLdouble"/>
-        <param name="v2" type="GLdouble"/>
-        <glx rop="149"/>
-    </function>
-
-    <function name="MapGrid2f" offset="227" static_dispatch="false">
-        <param name="un" type="GLint"/>
-        <param name="u1" type="GLfloat"/>
-        <param name="u2" type="GLfloat"/>
-        <param name="vn" type="GLint"/>
-        <param name="v1" type="GLfloat"/>
-        <param name="v2" type="GLfloat"/>
-        <glx rop="150"/>
-    </function>
-
-    <function name="EvalCoord1d" offset="228" vectorequiv="EvalCoord1dv" static_dispatch="false">
-        <param name="u" type="GLdouble"/>
-    </function>
-
-    <function name="EvalCoord1dv" offset="229" static_dispatch="false">
-        <param name="u" type="const GLdouble *" count="1"/>
-        <glx rop="151"/>
-    </function>
-
-    <function name="EvalCoord1f" offset="230" vectorequiv="EvalCoord1fv" static_dispatch="false">
-        <param name="u" type="GLfloat"/>
-    </function>
-
-    <function name="EvalCoord1fv" offset="231" static_dispatch="false">
-        <param name="u" type="const GLfloat *" count="1"/>
-        <glx rop="152"/>
-    </function>
-
-    <function name="EvalCoord2d" offset="232" vectorequiv="EvalCoord2dv" static_dispatch="false">
-        <param name="u" type="GLdouble"/>
-        <param name="v" type="GLdouble"/>
-    </function>
-
-    <function name="EvalCoord2dv" offset="233" static_dispatch="false">
-        <param name="u" type="const GLdouble *" count="2"/>
-        <glx rop="153"/>
-    </function>
-
-    <function name="EvalCoord2f" offset="234" vectorequiv="EvalCoord2fv" static_dispatch="false">
-        <param name="u" type="GLfloat"/>
-        <param name="v" type="GLfloat"/>
-    </function>
-
-    <function name="EvalCoord2fv" offset="235" static_dispatch="false">
-        <param name="u" type="const GLfloat *" count="2"/>
-        <glx rop="154"/>
-    </function>
-
-    <function name="EvalMesh1" offset="236" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <param name="i1" type="GLint"/>
-        <param name="i2" type="GLint"/>
-        <glx rop="155"/>
-    </function>
-
-    <function name="EvalPoint1" offset="237" static_dispatch="false">
-        <param name="i" type="GLint"/>
-        <glx rop="156"/>
-    </function>
-
-    <function name="EvalMesh2" offset="238" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <param name="i1" type="GLint"/>
-        <param name="i2" type="GLint"/>
-        <param name="j1" type="GLint"/>
-        <param name="j2" type="GLint"/>
-        <glx rop="157"/>
-    </function>
-
-    <function name="EvalPoint2" offset="239" static_dispatch="false">
-        <param name="i" type="GLint"/>
-        <param name="j" type="GLint"/>
-        <glx rop="158"/>
-    </function>
-
-    <!--function name="AlphaFunc" offset="240" static_dispatch="false">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLclampf"/>
-        <glx rop="159"/>
-    </function>
-
-    <function name="BlendFunc" offset="241" static_dispatch="false">
-        <param name="sfactor" type="GLenum"/>
-        <param name="dfactor" type="GLenum"/>
-        <glx rop="160"/>
-    </function>
-
-    <function name="LogicOp" offset="242" static_dispatch="false">
-        <param name="opcode" type="GLenum"/>
-        <glx rop="161"/>
-    </function>
-
-    <function name="StencilFunc" offset="243" static_dispatch="false">
-        <param name="func" type="GLenum"/>
-        <param name="ref" type="GLint"/>
-        <param name="mask" type="GLuint"/>
-        <glx rop="162"/>
-    </function>
-
-    <function name="StencilOp" offset="244" static_dispatch="false">
-        <param name="fail" type="GLenum"/>
-        <param name="zfail" type="GLenum"/>
-        <param name="zpass" type="GLenum"/>
-        <glx rop="163"/>
-    </function>
-
-    <function name="DepthFunc" offset="245" static_dispatch="false">
-        <param name="func" type="GLenum"/>
-        <glx rop="164"/>
-    </function-->
-
-    <function name="PixelZoom" offset="246" static_dispatch="false">
-        <param name="xfactor" type="GLfloat"/>
-        <param name="yfactor" type="GLfloat"/>
-        <glx rop="165"/>
-    </function>
-
-    <function name="PixelTransferf" offset="247" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx rop="166"/>
-    </function>
-
-    <function name="PixelTransferi" offset="248" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx rop="167"/>
-    </function>
-
-    <function name="PixelStoref" offset="249" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLfloat"/>
-        <glx sop="109" handcode="client"/>
-    </function>
-
-    <!--function name="PixelStorei" offset="250" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="param" type="GLint"/>
-        <glx sop="110" handcode="client"/>
-    </function-->
-
-    <function name="PixelMapfv" offset="251" static_dispatch="false">
-        <param name="map" type="GLenum"/>
-        <param name="mapsize" type="GLsizei" counter="true"/>
-        <param name="values" type="const GLfloat *" count="mapsize"/>
-        <glx rop="168" large="true"/>
-    </function>
-
-    <function name="PixelMapuiv" offset="252" static_dispatch="false">
-        <param name="map" type="GLenum"/>
-        <param name="mapsize" type="GLsizei" counter="true"/>
-        <param name="values" type="const GLuint *" count="mapsize"/>
-        <glx rop="169" large="true"/>
-    </function>
-
-    <function name="PixelMapusv" offset="253" static_dispatch="false">
-        <param name="map" type="GLenum"/>
-        <param name="mapsize" type="GLsizei" counter="true"/>
-        <param name="values" type="const GLushort *" count="mapsize"/>
-        <glx rop="170" large="true"/>
-    </function>
-
-    <function name="ReadBuffer" offset="254" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="171"/>
-    </function>
-
-    <function name="CopyPixels" offset="255" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="type" type="GLenum"/>
-        <glx rop="172"/>
-    </function>
-
-    <!--function name="ReadPixels" offset="256" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="GLvoid *" output="true"  img_width="width" img_height="height" img_format="format" img_type="type" img_target="0"/>
-        <glx sop="111"/>
-    </function-->
-
-    <function name="DrawPixels" offset="257" static_dispatch="false">
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="0" img_pad_dimensions="false"/>
-        <glx rop="173" large="true"/>
-    </function>
-
-    <!--function name="GetBooleanv" offset="258" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLboolean *" output="true" variable_param="pname"/>
-        <glx sop="112" handcode="client"/>
-    </function-->
-
-    <function name="GetClipPlane" offset="259" static_dispatch="false">
-        <param name="plane" type="GLenum"/>
-        <param name="equation" type="GLdouble *" output="true" count="4"/>
-        <glx sop="113" always_array="true"/>
-    </function>
-
-    <function name="GetDoublev" offset="260" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLdouble *" output="true" variable_param="pname"/>
-        <glx sop="114" handcode="client"/>
-    </function>
-
-    <!--function name="GetError" offset="261" static_dispatch="false">
-        <return type="GLenum"/>
-        <glx sop="115" handcode="client"/>
-    </function>
-
-    <function name="GetFloatv" offset="262" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="116" handcode="client"/>
-    </function>
-
-    <function name="GetIntegerv" offset="263" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="117" handcode="client"/>
-    </function>
-
-    <function name="GetLightfv" offset="264" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="118"/>
-    </function-->
-
-    <function name="GetLightiv" offset="265" static_dispatch="false">
-        <param name="light" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="119"/>
-    </function>
-
-    <function name="GetMapdv" offset="266" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="query" type="GLenum"/>
-        <param name="v" type="GLdouble *" output="true" variable_param="target query"/>
-        <glx sop="120"/>
-    </function>
-
-    <function name="GetMapfv" offset="267" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="query" type="GLenum"/>
-        <param name="v" type="GLfloat *" output="true" variable_param="target query"/>
-        <glx sop="121"/>
-    </function>
-
-    <function name="GetMapiv" offset="268" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="query" type="GLenum"/>
-        <param name="v" type="GLint *" output="true" variable_param="target query"/>
-        <glx sop="122"/>
-    </function>
-
-    <!--function name="GetMaterialfv" offset="269" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="123"/>
-    </function-->
-
-    <function name="GetMaterialiv" offset="270" static_dispatch="false">
-        <param name="face" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="124"/>
-    </function>
-
-    <function name="GetPixelMapfv" offset="271" static_dispatch="false">
-        <param name="map" type="GLenum"/>
-        <param name="values" type="GLfloat *" output="true" variable_param="map"/>
-        <glx sop="125"/>
-    </function>
-
-    <function name="GetPixelMapuiv" offset="272" static_dispatch="false">
-        <param name="map" type="GLenum"/>
-        <param name="values" type="GLuint *" output="true" variable_param="map"/>
-        <glx sop="126"/>
-    </function>
-
-    <function name="GetPixelMapusv" offset="273" static_dispatch="false">
-        <param name="map" type="GLenum"/>
-        <param name="values" type="GLushort *" output="true" variable_param="map"/>
-        <glx sop="127"/>
-    </function>
-
-    <function name="GetPolygonStipple" offset="274" static_dispatch="false">
-        <param name="mask" type="GLubyte *" output="true" img_width="32" img_height="32" img_format="GL_COLOR_INDEX" img_type="GL_BITMAP"/>
-        <glx sop="128"/>
-    </function>
-
-    <!--function name="GetString" offset="275" static_dispatch="false">
-        <param name="name" type="GLenum"/>
-        <return type="const GLubyte *"/>
-        <glx sop="129" handcode="true"/>
-    </function>
-
-    <function name="GetTexEnvfv" offset="276" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="130"/>
-    </function>
-
-    <function name="GetTexEnviv" offset="277" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="131"/>
-    </function-->
-
-    <function name="GetTexGendv" offset="278" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLdouble *" output="true" variable_param="pname"/>
-        <glx sop="132"/>
-    </function>
-
-    <!--function name="GetTexGenfv" offset="279" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="133"/>
-    </function>
-
-    <function name="GetTexGeniv" offset="280" static_dispatch="false">
-        <param name="coord" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="134"/>
-    </function-->
-
-    <function name="GetTexImage" offset="281" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="GLvoid *" output="true" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type"/>
-        <glx sop="135" dimensions_in_reply="true"/>
-    </function>
-
-    <!--function name="GetTexParameterfv" offset="282" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="136"/>
-    </function>
-
-    <function name="GetTexParameteriv" offset="283" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="137"/>
-    </function-->
-
-    <function name="GetTexLevelParameterfv" offset="284" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="138"/>
-    </function>
-
-    <function name="GetTexLevelParameteriv" offset="285" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="139"/>
-    </function>
-
-    <!--function name="IsEnabled" offset="286" static_dispatch="false">
-        <param name="cap" type="GLenum"/>
-        <return type="GLboolean"/>
-        <glx sop="140" handcode="client"/>
-    </function-->
-
-    <function name="IsList" offset="287" static_dispatch="false">
-        <param name="list" type="GLuint"/>
-        <return type="GLboolean"/>
-        <glx sop="141"/>
-    </function>
-
-    <function name="DepthRange" offset="288" static_dispatch="false">
-        <param name="zNear" type="GLclampd"/>
-        <param name="zFar" type="GLclampd"/>
-        <glx rop="174"/>
-    </function>
-
-    <function name="Frustum" offset="289" static_dispatch="false">
-        <param name="left" type="GLdouble"/>
-        <param name="right" type="GLdouble"/>
-        <param name="bottom" type="GLdouble"/>
-        <param name="top" type="GLdouble"/>
-        <param name="zNear" type="GLdouble"/>
-        <param name="zFar" type="GLdouble"/>
-        <glx rop="175"/>
-    </function>
-
-    <!--function name="LoadIdentity" offset="290" static_dispatch="false">
-        <glx rop="176"/>
-    </function>
-
-    <function name="LoadMatrixf" offset="291" static_dispatch="false">
-        <param name="m" type="const GLfloat *" count="16"/>
-        <glx rop="177"/>
-    </function-->
-
-    <function name="LoadMatrixd" offset="292" static_dispatch="false">
-        <param name="m" type="const GLdouble *" count="16"/>
-        <glx rop="178"/>
-    </function>
-
-    <!--function name="MatrixMode" offset="293" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="179"/>
-    </function>
-
-    <function name="MultMatrixf" offset="294" static_dispatch="false">
-        <param name="m" type="const GLfloat *" count="16"/>
-        <glx rop="180"/>
-    </function-->
-
-    <function name="MultMatrixd" offset="295" static_dispatch="false">
-        <param name="m" type="const GLdouble *" count="16"/>
-        <glx rop="181"/>
-    </function>
-
-    <function name="Ortho" offset="296" static_dispatch="false">
-        <param name="left" type="GLdouble"/>
-        <param name="right" type="GLdouble"/>
-        <param name="bottom" type="GLdouble"/>
-        <param name="top" type="GLdouble"/>
-        <param name="zNear" type="GLdouble"/>
-        <param name="zFar" type="GLdouble"/>
-        <glx rop="182"/>
-    </function>
-
-    <!--function name="PopMatrix" offset="297" static_dispatch="false">
-        <glx rop="183"/>
-    </function>
-
-    <function name="PushMatrix" offset="298" static_dispatch="false">
-        <glx rop="184"/>
-    </function-->
-
-    <function name="Rotated" offset="299" static_dispatch="false">
-        <param name="angle" type="GLdouble"/>
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-        <glx rop="185"/>
-    </function>
-
-    <!--function name="Rotatef" offset="300" static_dispatch="false">
-        <param name="angle" type="GLfloat"/>
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="186"/>
-    </function-->
-
-    <function name="Scaled" offset="301" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-        <glx rop="187"/>
-    </function>
-
-    <!--function name="Scalef" offset="302" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="188"/>
-    </function-->
-
-    <function name="Translated" offset="303" static_dispatch="false">
-        <param name="x" type="GLdouble"/>
-        <param name="y" type="GLdouble"/>
-        <param name="z" type="GLdouble"/>
-        <glx rop="189"/>
-    </function>
-
-    <!--function name="Translatef" offset="304" static_dispatch="false">
-        <param name="x" type="GLfloat"/>
-        <param name="y" type="GLfloat"/>
-        <param name="z" type="GLfloat"/>
-        <glx rop="190"/>
-    </function>
-
-    <function name="Viewport" offset="305" static_dispatch="false">
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="191"/>
-    </function-->
-</category>
-
-<category name="1.1">
-    <function name="ArrayElement" offset="306" static_dispatch="false">
-        <param name="i" type="GLint"/>
-        <glx handcode="true"/>
-    </function>
-
-    <!--function name="ColorPointer" offset="308" static_dispatch="false">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="DisableClientState" offset="309" static_dispatch="false">
-        <param name="array" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="DrawArrays" offset="310" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <param name="first" type="GLint"/>
-        <param name="count" type="GLsizei"/>
-        <glx rop="193" handcode="true"/>
-    </function>
-
-    <function name="DrawElements" offset="311" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <param name="count" type="GLsizei"/>
-        <param name="type" type="GLenum"/>
-        <param name="indices" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function-->
-
-    <function name="EdgeFlagPointer" offset="312" static_dispatch="false">
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <!--function name="EnableClientState" offset="313" static_dispatch="false">
-        <param name="array" type="GLenum"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="GetPointerv" offset="329" static_dispatch="false">
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLvoid **" output="true"/>
-        <glx handcode="true"/>
-    </function-->
-
-    <function name="IndexPointer" offset="314" static_dispatch="false">
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="InterleavedArrays" offset="317" static_dispatch="false">
-        <param name="format" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <!--function name="NormalPointer" offset="318" static_dispatch="false">
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="TexCoordPointer" offset="320" static_dispatch="false">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="VertexPointer" offset="321" static_dispatch="false">
-        <param name="size" type="GLint"/>
-        <param name="type" type="GLenum"/>
-        <param name="stride" type="GLsizei"/>
-        <param name="pointer" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="PolygonOffset" offset="319" static_dispatch="false">
-        <param name="factor" type="GLfloat"/>
-        <param name="units" type="GLfloat"/>
-        <glx rop="192"/>
-    </function-->
-
-    <function name="CopyTexImage1D" offset="323" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <glx rop="4119"/>
-    </function>
-
-    <!--function name="CopyTexImage2D" offset="324" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <glx rop="4120"/>
-    </function-->
-
-    <function name="CopyTexSubImage1D" offset="325" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <glx rop="4121"/>
-    </function>
-
-    <!--function name="CopyTexSubImage2D" offset="326" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4122"/>
-    </function-->
-
-    <function name="TexSubImage1D" offset="332" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_xoff="xoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4099" large="true"/>
-    </function>
-
-    <!--function name="TexSubImage2D" offset="333" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_xoff="xoffset" img_yoff="yoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4100" large="true"/>
-    </function-->
-
-    <function name="AreTexturesResident" offset="322" static_dispatch="false">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="textures" type="const GLuint *" count="n"/>
-        <param name="residences" type="GLboolean *" output="true" count="n"/>
-        <return type="GLboolean"/>
-        <glx sop="143" handcode="client" always_array="true"/>
-    </function>
-
-    <!--function name="BindTexture" offset="307" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="texture" type="GLuint"/>
-        <glx rop="4117"/>
-    </function>
-
-    <function name="DeleteTextures" offset="327" static_dispatch="false">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="textures" type="const GLuint *" count="n"/>
-        <glx sop="144"/>
-    </function>
-
-    <function name="GenTextures" offset="328" static_dispatch="false">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="textures" type="GLuint *" output="true" count="n"/>
-        <glx sop="145" always_array="true"/>
-    </function>
-
-    <function name="IsTexture" offset="330" static_dispatch="false">
-        <param name="texture" type="GLuint"/>
-        <return type="GLboolean"/>
-        <glx sop="146"/>
-    </function-->
-
-    <function name="PrioritizeTextures" offset="331" static_dispatch="false">
-        <param name="n" type="GLsizei" counter="true"/>
-        <param name="textures" type="const GLuint *" count="n"/>
-        <param name="priorities" type="const GLclampf *" count="n"/>
-        <glx rop="4118"/>
-    </function>
-
-    <function name="Indexub" offset="315" vectorequiv="Indexubv" static_dispatch="false">
-        <param name="c" type="GLubyte"/>
-    </function>
-
-    <function name="Indexubv" offset="316" static_dispatch="false">
-        <param name="c" type="const GLubyte *" count="1"/>
-        <glx rop="194"/>
-    </function>
-
-    <function name="PopClientAttrib" offset="334" static_dispatch="false">
-        <glx handcode="true"/>
-    </function>
-
-    <function name="PushClientAttrib" offset="335" static_dispatch="false">
-        <param name="mask" type="GLbitfield"/>
-        <glx handcode="true"/>
-    </function>
-</category>
-
-<category name="1.2">
-    <!--function name="BlendColor" offset="336" static_dispatch="false">
-        <param name="red" type="GLclampf"/>
-        <param name="green" type="GLclampf"/>
-        <param name="blue" type="GLclampf"/>
-        <param name="alpha" type="GLclampf"/>
-        <glx rop="4096"/>
-    </function>
-
-    <function name="BlendEquation" offset="337" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <glx rop="4097"/>
-    </function-->
-
-    <function name="DrawRangeElements" offset="338" static_dispatch="false">
-        <param name="mode" type="GLenum"/>
-        <param name="start" type="GLuint"/>
-        <param name="end" type="GLuint"/>
-        <param name="count" type="GLsizei"/>
-        <param name="type" type="GLenum"/>
-        <param name="indices" type="const GLvoid *"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="ColorTable" offset="339" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="table" type="const GLvoid *" img_width="width" img_pad_dimensions="false" img_format="format" img_type="type" img_target="target"/>
-        <glx rop="2053" large="true"/>
-    </function>
-
-    <function name="ColorTableParameterfv" offset="340" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="2054"/>
-    </function>
-
-    <function name="ColorTableParameteriv" offset="341" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="2055"/>
-    </function>
-
-    <function name="CopyColorTable" offset="342" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <glx rop="2056"/>
-    </function>
-
-    <function name="GetColorTable" offset="343" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="table" type="GLvoid *" output="true" img_width="width" img_format="format" img_type="type"/>
-        <glx sop="147" dimensions_in_reply="true"/>
-    </function>
-
-    <function name="GetColorTableParameterfv" offset="344" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="148"/>
-    </function>
-
-    <function name="GetColorTableParameteriv" offset="345" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="149"/>
-    </function>
-
-    <function name="ColorSubTable" offset="346" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="start" type="GLsizei"/>
-        <param name="count" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="data" type="const GLvoid *" img_width="count" img_pad_dimensions="false" img_format="format" img_type="type" img_target="target"/>
-        <glx rop="195" large="true"/>
-    </function>
-
-    <function name="CopyColorSubTable" offset="347" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="start" type="GLsizei"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <glx rop="196"/>
-    </function>
-
-    <function name="ConvolutionFilter1D" offset="348" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="image" type="const GLvoid *" img_width="width" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4101" large="true"/>
-    </function>
-
-    <function name="ConvolutionFilter2D" offset="349" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="image" type="const GLvoid *" img_width="width" img_height="height" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4102" large="true"/>
-    </function>
-
-    <function name="ConvolutionParameterf" offset="350" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat"/>
-        <glx rop="4103"/>
-    </function>
-
-    <function name="ConvolutionParameterfv" offset="351" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLfloat *" variable_param="pname"/>
-        <glx rop="4104"/>
-    </function>
-
-    <function name="ConvolutionParameteri" offset="352" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint"/>
-        <glx rop="4105"/>
-    </function>
-
-    <function name="ConvolutionParameteriv" offset="353" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="const GLint *" variable_param="pname"/>
-        <glx rop="4106"/>
-    </function>
-
-    <function name="CopyConvolutionFilter1D" offset="354" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <glx rop="4107"/>
-    </function>
-
-    <function name="CopyConvolutionFilter2D" offset="355" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4108"/>
-    </function>
-
-    <function name="GetConvolutionFilter" offset="356" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="image" type="GLvoid *" output="true" img_width="width" img_height="height" img_format="format" img_type="type"/>
-        <glx sop="150" dimensions_in_reply="true"/>
-    </function>
-
-    <function name="GetConvolutionParameterfv" offset="357" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="151"/>
-    </function>
-
-    <function name="GetConvolutionParameteriv" offset="358" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="152"/>
-    </function>
-
-    <function name="GetSeparableFilter" offset="359" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="row" type="GLvoid *" output="true"/>
-        <param name="column" type="GLvoid *" output="true"/>
-        <param name="span" type="GLvoid *" output="true"/>
-        <glx sop="153" handcode="true"/>
-    </function>
-
-    <function name="SeparableFilter2D" offset="360" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="row" type="const GLvoid *"/>
-        <param name="column" type="const GLvoid *"/>
-        <glx rop="4109" handcode="true"/>
-    </function>
-
-    <function name="GetHistogram" offset="361" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="reset" type="GLboolean"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="values" type="GLvoid *" output="true" img_width="width" img_format="format" img_type="type"/>
-        <glx sop="154" dimensions_in_reply="true" img_reset="reset"/>
-    </function>
-
-    <function name="GetHistogramParameterfv" offset="362" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="155"/>
-    </function>
-
-    <function name="GetHistogramParameteriv" offset="363" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="156"/>
-    </function>
-
-    <function name="GetMinmax" offset="364" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="reset" type="GLboolean"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="values" type="GLvoid *" output="true" img_width="2" img_format="format" img_type="type"/>
-        <glx sop="157" img_reset="reset"/>
-    </function>
-
-    <function name="GetMinmaxParameterfv" offset="365" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLfloat *" output="true" variable_param="pname"/>
-        <glx sop="158"/>
-    </function>
-
-    <function name="GetMinmaxParameteriv" offset="366" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLint *" output="true" variable_param="pname"/>
-        <glx sop="159"/>
-    </function>
-
-    <function name="Histogram" offset="367" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="width" type="GLsizei"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="sink" type="GLboolean"/>
-        <glx rop="4110"/>
-    </function>
-
-    <function name="Minmax" offset="368" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="internalformat" type="GLenum"/>
-        <param name="sink" type="GLboolean"/>
-        <glx rop="4111"/>
-    </function>
-
-    <function name="ResetHistogram" offset="369" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <glx rop="4112"/>
-    </function>
-
-    <function name="ResetMinmax" offset="370" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <glx rop="4113"/>
-    </function>
-
-    <!--function name="TexImage3D" offset="371" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="internalformat" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="border" type="GLint"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_format="format" img_type="type" img_target="target" img_null_flag="true" img_pad_dimensions="true"/>
-        <glx rop="4114" large="true"/>
-    </function>
-
-    <function name="TexSubImage3D" offset="372" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <param name="depth" type="GLsizei"/>
-        <param name="format" type="GLenum"/>
-        <param name="type" type="GLenum"/>
-        <param name="UNUSED" type="GLuint" padding="true"/>
-        <param name="pixels" type="const GLvoid *" img_width="width" img_height="height" img_depth="depth" img_xoff="xoffset" img_yoff="yoffset" img_zoff="zoffset" img_format="format" img_type="type" img_target="target" img_pad_dimensions="true"/>
-        <glx rop="4115" large="true"/>
-    </function>
-
-    <function name="CopyTexSubImage3D" offset="373" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="level" type="GLint"/>
-        <param name="xoffset" type="GLint"/>
-        <param name="yoffset" type="GLint"/>
-        <param name="zoffset" type="GLint"/>
-        <param name="x" type="GLint"/>
-        <param name="y" type="GLint"/>
-        <param name="width" type="GLsizei"/>
-        <param name="height" type="GLsizei"/>
-        <glx rop="4123"/>
-    </function-->
-</category>
-
-<category name="GL_ARB_multitexture" number="1">
-    <!--function name="ActiveTextureARB" offset="374" static_dispatch="false">
-        <param name="texture" type="GLenum"/>
-        <glx rop="197"/>
-    </function>
-
-    <function name="ClientActiveTextureARB" offset="375" static_dispatch="false">
-        <param name="texture" type="GLenum"/>
-        <glx handcode="true"/>
-    </function-->
-
-    <function name="MultiTexCoord1dARB" offset="376" vectorequiv="MultiTexCoord1dvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLdouble"/>
-    </function>
-
-    <function name="MultiTexCoord1dvARB" offset="377" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLdouble *" count="1"/>
-        <glx rop="198"/>
-    </function>
-
-    <function name="MultiTexCoord1fARB" offset="378" vectorequiv="MultiTexCoord1fvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-    </function>
-
-    <function name="MultiTexCoord1fvARB" offset="379" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLfloat *" count="1"/>
-        <glx rop="199"/>
-    </function>
-
-    <function name="MultiTexCoord1iARB" offset="380" vectorequiv="MultiTexCoord1ivARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLint"/>
-    </function>
-
-    <function name="MultiTexCoord1ivARB" offset="381" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLint *" count="1"/>
-        <glx rop="200"/>
-    </function>
-
-    <function name="MultiTexCoord1sARB" offset="382" vectorequiv="MultiTexCoord1svARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLshort"/>
-    </function>
-
-    <function name="MultiTexCoord1svARB" offset="383" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLshort *" count="1"/>
-        <glx rop="201"/>
-    </function>
-
-    <function name="MultiTexCoord2dARB" offset="384" vectorequiv="MultiTexCoord2dvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLdouble"/>
-        <param name="t" type="GLdouble"/>
-    </function>
-
-    <function name="MultiTexCoord2dvARB" offset="385" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLdouble *" count="2"/>
-        <glx rop="202"/>
-    </function>
-
-    <function name="MultiTexCoord2fARB" offset="386" vectorequiv="MultiTexCoord2fvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-    </function>
-
-    <function name="MultiTexCoord2fvARB" offset="387" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLfloat *" count="2"/>
-        <glx rop="203"/>
-    </function>
-
-    <function name="MultiTexCoord2iARB" offset="388" vectorequiv="MultiTexCoord2ivARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLint"/>
-        <param name="t" type="GLint"/>
-    </function>
-
-    <function name="MultiTexCoord2ivARB" offset="389" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLint *" count="2"/>
-        <glx rop="204"/>
-    </function>
-
-    <function name="MultiTexCoord2sARB" offset="390" vectorequiv="MultiTexCoord2svARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLshort"/>
-        <param name="t" type="GLshort"/>
-    </function>
-
-    <function name="MultiTexCoord2svARB" offset="391" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLshort *" count="2"/>
-        <glx rop="205"/>
-    </function>
-
-    <function name="MultiTexCoord3dARB" offset="392" vectorequiv="MultiTexCoord3dvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLdouble"/>
-        <param name="t" type="GLdouble"/>
-        <param name="r" type="GLdouble"/>
-    </function>
-
-    <function name="MultiTexCoord3dvARB" offset="393" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLdouble *" count="3"/>
-        <glx rop="206"/>
-    </function>
-
-    <function name="MultiTexCoord3fARB" offset="394" vectorequiv="MultiTexCoord3fvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-    </function>
-
-    <function name="MultiTexCoord3fvARB" offset="395" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLfloat *" count="3"/>
-        <glx rop="207"/>
-    </function>
-
-    <function name="MultiTexCoord3iARB" offset="396" vectorequiv="MultiTexCoord3ivARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLint"/>
-        <param name="t" type="GLint"/>
-        <param name="r" type="GLint"/>
-    </function>
-
-    <function name="MultiTexCoord3ivARB" offset="397" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLint *" count="3"/>
-        <glx rop="208"/>
-    </function>
-
-    <function name="MultiTexCoord3sARB" offset="398" vectorequiv="MultiTexCoord3svARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLshort"/>
-        <param name="t" type="GLshort"/>
-        <param name="r" type="GLshort"/>
-    </function>
-
-    <function name="MultiTexCoord3svARB" offset="399" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLshort *" count="3"/>
-        <glx rop="209"/>
-    </function>
-
-    <function name="MultiTexCoord4dARB" offset="400" vectorequiv="MultiTexCoord4dvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLdouble"/>
-        <param name="t" type="GLdouble"/>
-        <param name="r" type="GLdouble"/>
-        <param name="q" type="GLdouble"/>
-    </function>
-
-    <function name="MultiTexCoord4dvARB" offset="401" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLdouble *" count="4"/>
-        <glx rop="210"/>
-    </function>
-
-    <!--function name="MultiTexCoord4fARB" offset="402" vectorequiv="MultiTexCoord4fvARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLfloat"/>
-        <param name="t" type="GLfloat"/>
-        <param name="r" type="GLfloat"/>
-        <param name="q" type="GLfloat"/>
-    </function-->
-
-    <function name="MultiTexCoord4fvARB" offset="403" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLfloat *" count="4"/>
-        <glx rop="211"/>
-    </function>
-
-    <function name="MultiTexCoord4iARB" offset="404" vectorequiv="MultiTexCoord4ivARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLint"/>
-        <param name="t" type="GLint"/>
-        <param name="r" type="GLint"/>
-        <param name="q" type="GLint"/>
-    </function>
-
-    <function name="MultiTexCoord4ivARB" offset="405" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLint *" count="4"/>
-        <glx rop="212"/>
-    </function>
-
-    <function name="MultiTexCoord4sARB" offset="406" vectorequiv="MultiTexCoord4svARB" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="s" type="GLshort"/>
-        <param name="t" type="GLshort"/>
-        <param name="r" type="GLshort"/>
-        <param name="q" type="GLshort"/>
-    </function>
-
-    <function name="MultiTexCoord4svARB" offset="407" static_dispatch="false">
-        <param name="target" type="GLenum"/>
-        <param name="v" type="const GLshort *" count="4"/>
-        <glx rop="213"/>
-    </function>
-</category>
-
-<xi:include href="../gen/APPLE_vertex_array_object.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/es_EXT.xml b/src/mapi/glapi/gen-es/es_EXT.xml
deleted file mode 100644
index 0013df87e82..00000000000
--- a/src/mapi/glapi/gen-es/es_EXT.xml
+++ /dev/null
@@ -1,125 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE OpenGLAPI SYSTEM "../gen/gl_API.dtd">
-
-<!-- OpenGL ES extensions -->
-
-<OpenGLAPI>
-
-<category name="GL_OES_compressed_paletted_texture" number="6">
-    <enum name="PALETTE4_RGB8_OES"                        value="0x8B90"/>
-    <enum name="PALETTE4_RGBA8_OES"                       value="0x8B91"/>
-    <enum name="PALETTE4_R5_G6_B5_OES"                    value="0x8B92"/>
-    <enum name="PALETTE4_RGBA4_OES"                       value="0x8B93"/>
-    <enum name="PALETTE4_RGB5_A1_OES"                     value="0x8B94"/>
-    <enum name="PALETTE8_RGB8_OES"                        value="0x8B95"/>
-    <enum name="PALETTE8_RGBA8_OES"                       value="0x8B96"/>
-    <enum name="PALETTE8_R5_G6_B5_OES"                    value="0x8B97"/>
-    <enum name="PALETTE8_RGBA4_OES"                       value="0x8B98"/>
-    <enum name="PALETTE8_RGB5_A1_OES"                     value="0x8B99"/>
-</category>
-
-<!-- 23. GL_OES_EGL_image -->
-<xi:include href="../gen/OES_EGL_image.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-
-<category name="GL_OES_depth24" number="24">
-    <enum name="DEPTH_COMPONENT24_OES"                    value="0x81A6"/>
-</category>
-
-<category name="GL_OES_depth32" number="25">
-    <enum name="DEPTH_COMPONENT32_OES"                    value="0x81A7"/>
-</category>
-
-<category name="GL_OES_element_index_uint" number="26">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<category name="GL_OES_fbo_render_mipmap" number="27">
-    <!-- No new functions, types, enums. -->
-</category>
-
-<category name="GL_OES_mapbuffer" number="29">
-    <enum name="WRITE_ONLY_OES"                           value="0x88B9"/>
-    <enum name="BUFFER_ACCESS_OES"                        value="0x88BB"/>
-    <enum name="BUFFER_MAPPED_OES"                        value="0x88BC"/>
-    <enum name="BUFFER_MAP_POINTER_OES"                   value="0x88BD"/>
-
-    <function name="GetBufferPointervOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="pname" type="GLenum"/>
-        <param name="params" type="GLvoid **"/>
-    </function>
-
-    <function name="MapBufferOES" offset="assign">
-        <param name="target" type="GLenum"/>
-        <param name="access" type="GLenum"/>
-	<return type="GLvoid *"/>
-    </function>
-
-    <function name="UnmapBufferOES" offset="assign">
-        <param name="target" type="GLenum"/>
-	<return type="GLboolean"/>
-    </function>
-</category>
-
-<category name="GL_OES_rgb8_rgba8" number="30">
-    <enum name="RGB8_OES"                                 value="0x8051"/>
-    <enum name="RGBA8_OES"                                value="0x8058"/>
-</category>
-
-<category name="GL_OES_stencil1" number="31">
-    <enum name="STENCIL_INDEX1_OES"                       value="0x8D46"/>
-</category>
-
-<category name="GL_OES_stencil4" number="32">
-    <enum name="STENCIL_INDEX4_OES"                       value="0x8D47"/>
-</category>
-
-<category name="GL_OES_stencil8" number="33">
-    <enum name="STENCIL_INDEX8_OES"                       value="0x8D48"/>
-</category>
-
-<category name="GL_EXT_texture_filter_anisotropic" number="41">
-    <enum name="TEXTURE_MAX_ANISOTROPY_EXT"               value="0x84FE"/>
-    <enum name="MAX_TEXTURE_MAX_ANISOTROPY_EXT"           value="0x84FF"/>
-</category>
-
-<category name="GL_EXT_texture_compression_dxt1" number="49">
-    <enum name="COMPRESSED_RGB_S3TC_DXT1_EXT"             value="0x83F0"/>
-    <enum name="COMPRESSED_RGBA_S3TC_DXT1_EXT"            value="0x83F1"/>
-</category>
-
-<category name="GL_EXT_texture_format_BGRA8888" number="51">
-    <enum name="BGRA_EXT"                              value="0x80E1"/>
-</category>
-
-<category name="GL_EXT_blend_minmax" number="65">
-    <enum name="MIN_EXT"                               value="0x8007"/>
-    <enum name="MAX_EXT"                               value="0x8008"/>
-</category>
-
-<category name="GL_EXT_read_format_bgra" number="66">
-    <enum name="BGRA_EXT"                              value="0x80E1"/>
-    <enum name="UNSIGNED_SHORT_4_4_4_4_REV_EXT"        value="0x8365"/>
-    <enum name="UNSIGNED_SHORT_1_5_5_5_REV_EXT"        value="0x8366"/>
-</category>
-
-<category name="GL_EXT_multi_draw_arrays" number="69">
-    <function name="MultiDrawArraysEXT" offset="assign">
-        <param name="mode" type="GLenum"/>
-        <param name="first" type="const GLint *"/>
-        <param name="count" type="const GLsizei *"/>
-        <param name="primcount" type="GLsizei"/>
-        <glx handcode="true"/>
-    </function>
-
-    <function name="MultiDrawElementsEXT" offset="assign">
-        <param name="mode" type="GLenum"/>
-        <param name="count" type="const GLsizei *"/>
-        <param name="type" type="GLenum"/>
-        <param name="indices" type="const GLvoid **"/>
-        <param name="primcount" type="GLsizei"/>
-        <glx handcode="true"/>
-    </function>
-</category>
-
-</OpenGLAPI>
diff --git a/src/mapi/glapi/gen-es/gl_compare.py b/src/mapi/glapi/gen-es/gl_compare.py
deleted file mode 100644
index 6b5e43bb98b..00000000000
--- a/src/mapi/glapi/gen-es/gl_compare.py
+++ /dev/null
@@ -1,354 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright (C) 2009 Chia-I Wu <olv@0xlab.org>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# on the rights to use, copy, modify, merge, publish, distribute, sub
-# license, and/or sell copies of the Software, and to permit persons to whom
-# the Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
-# IBM AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-import sys
-import os.path
-import getopt
-
-GLAPI = "../../glapi/gen"
-sys.path.append(GLAPI)
-
-import gl_XML
-import glX_XML
-
-class ApiSet(object):
-    def __init__(self, api, elts=["enum", "type", "function"]):
-        self.api = api
-        self.elts = elts
-
-    def _check_enum(self, e1, e2, strict=True):
-        if e1.name != e2.name:
-            raise ValueError("%s: name mismatch" % e1.name)
-        if e1.value != e2.value:
-            raise ValueError("%s: value 0x%04x != 0x%04x"
-                    % (e1.name, e1.value, e2.value))
-
-    def _check_type(self, t1, t2, strict=True):
-        if t1.name != t2.name:
-            raise ValueError("%s: name mismatch" % t1.name)
-        if t1.type_expr.string() != t2.type_expr.string():
-            raise ValueError("%s: type %s != %s"
-                    % (t1.name, t1.type_expr.string(), t2.type_expr.string()))
-
-    def _check_function(self, f1, f2, strict=True):
-        if f1.name != f2.name:
-            raise ValueError("%s: name mismatch" % f1.name)
-        if f1.return_type != f2.return_type:
-            raise ValueError("%s: return type %s != %s"
-                    % (f1.name, f1.return_type, f2.return_type))
-        # there might be padded parameters
-        if strict and len(f1.parameters) != len(f2.parameters):
-            raise ValueError("%s: parameter length %d != %d"
-                    % (f1.name, len(f1.parameters), len(f2.parameters)))
-        if f1.assign_offset != f2.assign_offset:
-            if ((f1.assign_offset and f2.offset < 0) or
-                (f2.assign_offset and f1.offset < 0)):
-                raise ValueError("%s: assign offset %d != %d"
-                        % (f1.name, f1.assign_offset, f2.assign_offset))
-        elif not f1.assign_offset:
-            if f1.offset != f2.offset:
-                raise ValueError("%s: offset %d != %d"
-                        % (f1.name, f1.offset, f2.offset))
-
-        if strict:
-            l1 = f1.entry_points
-            l2 = f2.entry_points
-            l1.sort()
-            l2.sort()
-            if l1 != l2:
-                raise ValueError("%s: entry points %s != %s"
-                        % (f1.name, l1, l2))
-
-            l1 = f1.static_entry_points
-            l2 = f2.static_entry_points
-            l1.sort()
-            l2.sort()
-            if l1 != l2:
-                raise ValueError("%s: static entry points %s != %s"
-                        % (f1.name, l1, l2))
-
-        pad = 0
-        for i in xrange(len(f1.parameters)):
-            p1 = f1.parameters[i]
-            p2 = f2.parameters[i + pad]
-
-            if not strict and p1.is_padding != p2.is_padding:
-                if p1.is_padding:
-                    pad -= 1
-                    continue
-                else:
-                    pad += 1
-                    p2 = f2.parameters[i + pad]
-
-            if strict and p1.name != p2.name:
-                raise ValueError("%s: parameter %d name %s != %s"
-                        % (f1.name, i, p1.name, p2.name))
-            if p1.type_expr.string() != p2.type_expr.string():
-                if (strict or
-                    # special case
-                    f1.name == "TexImage2D" and p1.name != "internalformat"):
-                    raise ValueError("%s: parameter %s type %s != %s"
-                            % (f1.name, p1.name, p1.type_expr.string(),
-                               p2.type_expr.string()))
-
-    def union(self, other):
-        union = gl_XML.gl_api(None)
-
-        if "enum" in self.elts:
-            union.enums_by_name = other.enums_by_name.copy()
-            for key, val in self.api.enums_by_name.iteritems():
-                if key not in union.enums_by_name:
-                    union.enums_by_name[key] = val
-                else:
-                    self._check_enum(val, other.enums_by_name[key])
-
-        if "type" in self.elts:
-            union.types_by_name = other.types_by_name.copy()
-            for key, val in self.api.types_by_name.iteritems():
-                if key not in union.types_by_name:
-                    union.types_by_name[key] = val
-                else:
-                    self._check_type(val, other.types_by_name[key])
-
-        if "function" in self.elts:
-            union.functions_by_name = other.functions_by_name.copy()
-            for key, val in self.api.functions_by_name.iteritems():
-                if key not in union.functions_by_name:
-                    union.functions_by_name[key] = val
-                else:
-                    self._check_function(val, other.functions_by_name[key])
-
-        return union
-
-    def intersection(self, other):
-        intersection = gl_XML.gl_api(None)
-
-        if "enum" in self.elts:
-            for key, val in self.api.enums_by_name.iteritems():
-                if key in other.enums_by_name:
-                    self._check_enum(val, other.enums_by_name[key])
-                    intersection.enums_by_name[key] = val
-
-        if "type" in self.elts:
-            for key, val in self.api.types_by_name.iteritems():
-                if key in other.types_by_name:
-                    self._check_type(val, other.types_by_name[key])
-                    intersection.types_by_name[key] = val
-
-        if "function" in self.elts:
-            for key, val in self.api.functions_by_name.iteritems():
-                if key in other.functions_by_name:
-                    self._check_function(val, other.functions_by_name[key])
-                    intersection.functions_by_name[key] = val
-
-        return intersection
-
-    def difference(self, other):
-        difference = gl_XML.gl_api(None)
-
-        if "enum" in self.elts:
-            for key, val in self.api.enums_by_name.iteritems():
-                if key not in other.enums_by_name:
-                    difference.enums_by_name[key] = val
-                else:
-                    self._check_enum(val, other.enums_by_name[key])
-
-        if "type" in self.elts:
-            for key, val in self.api.types_by_name.iteritems():
-                if key not in other.types_by_name:
-                    difference.types_by_name[key] = val
-                else:
-                    self._check_type(val, other.types_by_name[key])
-
-        if "function" in self.elts:
-            for key, val in self.api.functions_by_name.iteritems():
-                if key not in other.functions_by_name:
-                    difference.functions_by_name[key] = val
-                else:
-                    self._check_function(val, other.functions_by_name[key], False)
-
-        return difference
-
-def cmp_enum(e1, e2):
-    if e1.value < e2.value:
-        return -1
-    elif e1.value > e2.value:
-        return 1
-    else:
-        return 0
-
-def cmp_type(t1, t2):
-    return t1.size - t2.size
-
-def cmp_function(f1, f2):
-    if f1.name > f2.name:
-        return 1
-    elif f1.name < f2.name:
-        return -1
-    else:
-        return 0
-
-def spaces(n, str=""):
-    spaces = n - len(str)
-    if spaces < 1:
-        spaces = 1
-    return " " * spaces
-
-def output_enum(e, indent=0):
-    attrs = 'name="%s"' % e.name
-    if e.default_count > 0:
-        tab = spaces(37, attrs)
-        attrs += '%scount="%d"' % (tab, e.default_count)
-    tab = spaces(48, attrs)
-    val = "%04x" % e.value
-    val = "0x" + val.upper()
-    attrs += '%svalue="%s"' % (tab, val)
-
-    # no child
-    if not e.functions:
-        print '%s<enum %s/>' % (spaces(indent), attrs)
-        return
-
-    print '%s<enum %s>' % (spaces(indent), attrs)
-    for key, val in e.functions.iteritems():
-        attrs = 'name="%s"' % key
-        if val[0] != e.default_count:
-            attrs += ' count="%d"' % val[0]
-        if not val[1]:
-            attrs += ' mode="get"'
-
-        print '%s<size %s/>' % (spaces(indent * 2), attrs)
-
-    print '%s</enum>' % spaces(indent)
-
-def output_type(t, indent=0):
-    tab = spaces(16, t.name)
-    attrs = 'name="%s"%ssize="%d"' % (t.name, tab, t.size)
-    ctype = t.type_expr.string()
-    if ctype.find("unsigned") != -1:
-        attrs += ' unsigned="true"'
-    elif ctype.find("signed") == -1:
-        attrs += ' float="true"'
-    print '%s<type %s/>' % (spaces(indent), attrs)
-
-def output_function(f, indent=0):
-    attrs = 'name="%s"' % f.name
-    if f.offset > 0:
-        if f.assign_offset:
-            attrs += ' offset="assign"'
-        else:
-            attrs += ' offset="%d"' % f.offset
-    print '%s<function %s>' % (spaces(indent), attrs)
-
-    for p in f.parameters:
-        attrs = 'name="%s" type="%s"' \
-                % (p.name, p.type_expr.original_string)
-        print '%s<param %s/>' % (spaces(indent * 2), attrs)
-    if f.return_type != "void":
-        attrs = 'type="%s"' % f.return_type
-        print '%s<return %s/>' % (spaces(indent * 2), attrs)
-
-    print '%s</function>' % spaces(indent)
-
-def output_category(api, indent=0):
-    enums = api.enums_by_name.values()
-    enums.sort(cmp_enum)
-    types = api.types_by_name.values()
-    types.sort(cmp_type)
-    functions = api.functions_by_name.values()
-    functions.sort(cmp_function)
-
-    for e in enums:
-        output_enum(e, indent)
-    if enums and types:
-        print
-    for t in types:
-        output_type(t, indent)
-    if enums or types:
-        print
-    for f in functions:
-        output_function(f, indent)
-        if f != functions[-1]:
-            print
-
-def is_api_empty(api):
-    return bool(not api.enums_by_name and
-                not api.types_by_name and
-                not api.functions_by_name)
-
-def show_usage(ops):
-    print "Usage: %s [-k elts] <%s> <file1> <file2>" % (sys.argv[0], "|".join(ops))
-    print "    -k elts   A comma separated string of types of elements to"
-    print "              skip.  Possible types are enum, type, and function."
-    sys.exit(1)
-
-def main():
-    ops = ["union", "intersection", "difference"]
-    elts = ["enum", "type", "function"]
-
-    try:
-        options, args = getopt.getopt(sys.argv[1:], "k:")
-    except Exception, e:
-        show_usage(ops)
-
-    if len(args) != 3:
-        show_usage(ops)
-    op, file1, file2 = args
-    if op not in ops:
-        show_usage(ops)
-
-    skips = []
-    for opt, val in options:
-        if opt == "-k":
-            skips = val.split(",")
-
-    for elt in skips:
-        try:
-            elts.remove(elt)
-        except ValueError:
-            show_usage(ops)
-
-    api1 = gl_XML.parse_GL_API(file1, glX_XML.glx_item_factory())
-    api2 = gl_XML.parse_GL_API(file2, glX_XML.glx_item_factory())
-
-    set = ApiSet(api1, elts)
-    func = getattr(set, op)
-    result = func(api2)
-
-    if not is_api_empty(result):
-        cat_name = "%s_of_%s_and_%s" \
-                % (op, os.path.basename(file1), os.path.basename(file2))
-
-        print '<?xml version="1.0"?>'
-        print '<!DOCTYPE OpenGLAPI SYSTEM "%s/gl_API.dtd">' % GLAPI
-        print
-        print '<OpenGLAPI>'
-        print
-        print '<category name="%s">' % (cat_name)
-        output_category(result, 4)
-        print '</category>'
-        print
-        print '</OpenGLAPI>'
-
-if __name__ == "__main__":
-    main()
diff --git a/src/mapi/glapi/gen-es/gl_parse_header.py b/src/mapi/glapi/gen-es/gl_parse_header.py
deleted file mode 100644
index 5382eba35c3..00000000000
--- a/src/mapi/glapi/gen-es/gl_parse_header.py
+++ /dev/null
@@ -1,450 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright (C) 2009 Chia-I Wu <olv@0xlab.org>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# on the rights to use, copy, modify, merge, publish, distribute, sub
-# license, and/or sell copies of the Software, and to permit persons to whom
-# the Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
-# IBM AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-import sys
-import os.path
-import getopt
-import re
-
-GLAPI = "../../glapi/gen"
-sys.path.append(GLAPI)
-
-class HeaderParser(object):
-    """Parser for GL header files."""
-
-    def __init__(self, verbose=0):
-        # match #if and #ifdef
-        self.IFDEF = re.compile('#\s*if(n?def\s+(?P<ifdef>\w+)|\s+(?P<if>.+))')
-        # match #endif
-        self.ENDIF = re.compile('#\s*endif')
-        # match typedef abc def;
-        self.TYPEDEF = re.compile('typedef\s+(?P<from>[\w ]+)\s+(?P<to>\w+);')
-        # match #define XYZ VAL
-        self.DEFINE = re.compile('#\s*define\s+(?P<key>\w+)(?P<value>\s+[\w"]*)?')
-        # match GLAPI
-        self.GLAPI = re.compile('^GL_?API(CALL)?\s+(?P<return>[\w\s*]+[\w*])\s+(GL)?_?APIENTRY\s+(?P<name>\w+)\s*\((?P<params>[\w\s(,*\[\])]+)\)\s*;')
-
-        self.split_params = re.compile('\s*,\s*')
-        self.split_ctype = re.compile('(\W)')
-        # ignore GL_VERSION_X_Y
-        self.ignore_enum = re.compile('GL(_ES)?_VERSION(_ES_C[ML])?_\d_\d')
-
-        self.verbose = verbose
-        self._reset()
-
-    def _reset(self):
-        """Reset to initial state."""
-        self.ifdef_levels = []
-        self.need_char = False
-
-    # use typeexpr?
-    def _format_ctype(self, ctype, fix=True):
-        """Format a ctype string, optionally fix it."""
-        # split the type string
-        tmp = self.split_ctype.split(ctype)
-        tmp = [s for s in tmp if s and s != " "]
-
-        pretty = ""
-        for i in xrange(len(tmp)):
-            # add missing GL prefix
-            if (fix and tmp[i] != "const" and tmp[i] != "*" and
-                not tmp[i].startswith("GL")):
-                tmp[i] = "GL" + tmp[i]
-
-            if i == 0:
-                pretty = tmp[i]
-            else:
-                sep = " "
-                if tmp[i - 1] == "*":
-                    sep = ""
-                pretty += sep + tmp[i]
-        return pretty
-
-    # use typeexpr?
-    def _get_ctype_attrs(self, ctype):
-        """Get the attributes of a ctype."""
-        is_float = (ctype.find("float") != -1 or ctype.find("double") != -1)
-        is_signed = not (ctype.find("unsigned")  != -1)
-
-        size = 0
-        if ctype.find("char") != -1:
-            size = 1
-        elif ctype.find("short") != -1:
-            size = 2
-        elif ctype.find("int") != -1:
-            size = 4
-        elif is_float:
-            if ctype.find("float") != -1:
-                size = 4
-            else:
-                size = 8
-
-        return (size, is_float, is_signed)
-
-    def _parse_define(self, line):
-        """Parse a #define line for an <enum>."""
-        m = self.DEFINE.search(line)
-        if not m:
-            if self.verbose and line.find("#define") >= 0:
-                print "ignore %s" % (line)
-            return None
-
-        key = m.group("key").strip()
-        val = m.group("value").strip()
-
-        # enum must begin with GL_ and be all uppercase
-        if ((not (key.startswith("GL_") and key.isupper())) or
-            (self.ignore_enum.match(key) and val == "1")):
-            if self.verbose:
-                print "ignore enum %s" % (key)
-            return None
-
-        return (key, val)
-
-    def _parse_typedef(self, line):
-        """Parse a typedef line for a <type>."""
-        m = self.TYPEDEF.search(line)
-        if not m:
-            if self.verbose and line.find("typedef") >= 0:
-                print "ignore %s" % (line)
-            return None
-
-        f = m.group("from").strip()
-        t = m.group("to").strip()
-        if not t.startswith("GL"):
-            if self.verbose:
-                print "ignore type %s" % (t)
-            return None
-        attrs = self._get_ctype_attrs(f)
-
-        return (f, t, attrs)
-
-    def _parse_gl_api(self, line):
-        """Parse a GLAPI line for a <function>."""
-        m = self.GLAPI.search(line)
-        if not m:
-            if self.verbose and line.find("APIENTRY") >= 0:
-                print "ignore %s" % (line)
-            return None
-
-        rettype = m.group("return")
-        rettype = self._format_ctype(rettype)
-        if rettype == "GLvoid":
-            rettype = ""
-
-        name = m.group("name")
-
-        param_str = m.group("params")
-        chunks = self.split_params.split(param_str)
-        chunks = [s.strip() for s in chunks]
-        if len(chunks) == 1 and (chunks[0] == "void" or chunks[0] == "GLvoid"):
-            chunks = []
-
-        params = []
-        for c in chunks:
-            # split type and variable name
-            idx = c.rfind("*")
-            if idx < 0:
-                idx = c.rfind(" ")
-            if idx >= 0:
-                idx += 1
-                ctype = c[:idx]
-                var = c[idx:]
-            else:
-                ctype = c
-                var = "unnamed"
-
-            # convert array to pointer
-            idx = var.find("[")
-            if idx >= 0:
-                var = var[:idx]
-                ctype += "*"
-
-            ctype = self._format_ctype(ctype)
-            var = var.strip()
-
-            if not self.need_char and ctype.find("GLchar") >= 0:
-                self.need_char = True
-
-            params.append((ctype, var))
-
-        return (rettype, name, params)
-
-    def _change_level(self, line):
-        """Parse a #ifdef line and change level."""
-        m = self.IFDEF.search(line)
-        if m:
-            ifdef = m.group("ifdef")
-            if not ifdef:
-                ifdef = m.group("if")
-            self.ifdef_levels.append(ifdef)
-            return True
-        m = self.ENDIF.search(line)
-        if m:
-            self.ifdef_levels.pop()
-            return True
-        return False
-
-    def _read_header(self, header):
-        """Open a header file and read its contents."""
-        lines = []
-        try:
-            fp = open(header, "rb")
-            lines = fp.readlines()
-            fp.close()
-        except IOError, e:
-            print "failed to read %s: %s" % (header, e)
-        return lines
-
-    def _cmp_enum(self, enum1, enum2):
-        """Compare two enums."""
-        # sort by length of the values as strings
-        val1 = enum1[1]
-        val2 = enum2[1]
-        ret = len(val1) - len(val2)
-        # sort by the values
-        if not ret:
-            val1 = int(val1, 16)
-            val2 = int(val2, 16)
-            ret = val1 - val2
-            # in case int cannot hold the result
-            if ret > 0:
-                ret = 1
-            elif ret < 0:
-                ret = -1
-        # sort by the names
-        if not ret:
-            if enum1[0] < enum2[0]:
-                ret = -1
-            elif enum1[0] > enum2[0]:
-                ret = 1
-        return ret
-
-    def _cmp_type(self, type1, type2):
-        """Compare two types."""
-        attrs1 = type1[2]
-        attrs2 = type2[2]
-        # sort by type size
-        ret = attrs1[0] - attrs2[0]
-        # float is larger
-        if not ret:
-            ret = attrs1[1] - attrs2[1]
-        # signed is larger
-        if not ret:
-            ret = attrs1[2] - attrs2[2]
-        # reverse
-        ret = -ret
-        return ret
-
-    def _cmp_function(self, func1, func2):
-        """Compare two functions."""
-        name1 = func1[1]
-        name2 = func2[1]
-        ret = 0
-        # sort by the names
-        if name1 < name2:
-            ret = -1
-        elif name1 > name2:
-            ret = 1
-        return ret
-
-    def _postprocess_dict(self, hdict):
-        """Post-process a header dict and return an ordered list."""
-        hlist = []
-        largest = 0
-        for key, cat in hdict.iteritems():
-            size = len(cat["enums"]) + len(cat["types"]) + len(cat["functions"])
-            # ignore empty category
-            if not size:
-                continue
-
-            cat["enums"].sort(self._cmp_enum)
-            # remove duplicates
-            dup = []
-            for i in xrange(1, len(cat["enums"])):
-                if cat["enums"][i] == cat["enums"][i - 1]:
-                    dup.insert(0, i)
-            for i in dup:
-                e = cat["enums"].pop(i)
-                if self.verbose:
-                    print "remove duplicate enum %s" % e[0]
-
-            cat["types"].sort(self._cmp_type)
-            cat["functions"].sort(self._cmp_function)
-
-            # largest category comes first
-            if size > largest:
-                hlist.insert(0, (key, cat))
-                largest = size
-            else:
-                hlist.append((key, cat))
-        return hlist
-
-    def parse(self, header):
-        """Parse a header file."""
-        self._reset()
-
-        if self.verbose:
-            print "Parsing %s" % (header)
-
-        hdict = {}
-        lines = self._read_header(header)
-        for line in lines:
-            if self._change_level(line):
-                continue
-
-            # skip until the first ifdef (i.e. __gl_h_)
-            if not self.ifdef_levels:
-                continue
-
-            cat_name = os.path.basename(header)
-            # check if we are in an extension
-            if (len(self.ifdef_levels) > 1 and
-                self.ifdef_levels[-1].startswith("GL_")):
-                cat_name = self.ifdef_levels[-1]
-
-            try:
-                cat = hdict[cat_name]
-            except KeyError:
-                cat = {
-                        "enums": [],
-                        "types": [],
-                        "functions": []
-                }
-                hdict[cat_name] = cat
-
-            key = "enums"
-            elem = self._parse_define(line)
-            if not elem:
-                key = "types"
-                elem = self._parse_typedef(line)
-            if not elem:
-                key = "functions"
-                elem = self._parse_gl_api(line)
-
-            if elem:
-                cat[key].append(elem)
-
-        if self.need_char:
-            if self.verbose:
-                print "define GLchar"
-            elem = self._parse_typedef("typedef char GLchar;")
-            cat["types"].append(elem)
-        return self._postprocess_dict(hdict)
-
-def spaces(n, str=""):
-    spaces = n - len(str)
-    if spaces < 1:
-        spaces = 1
-    return " " * spaces
-
-def output_xml(name, hlist):
-    """Output a parsed header in OpenGLAPI XML."""
-
-    for i in xrange(len(hlist)):
-        cat_name, cat = hlist[i]
-
-        print '<category name="%s">' % (cat_name)
-        indent = 4
-
-        for enum in cat["enums"]:
-            name = enum[0][3:]
-            value = enum[1]
-            tab = spaces(41, name)
-            attrs = 'name="%s"%svalue="%s"' % (name, tab, value)
-            print '%s<enum %s/>' % (spaces(indent), attrs)
-
-        if cat["enums"] and cat["types"]:
-            print
-
-        for type in cat["types"]:
-            ctype = type[0]
-            size, is_float, is_signed = type[2]
-
-            attrs = 'name="%s"' % (type[1][2:])
-            attrs += spaces(16, attrs) + 'size="%d"' % (size)
-            if is_float:
-                attrs += ' float="true"'
-            elif not is_signed:
-                attrs += ' unsigned="true"'
-
-            print '%s<type %s/>' % (spaces(indent), attrs)
-
-        for func in cat["functions"]:
-            print
-            ret = func[0]
-            name = func[1][2:]
-            params = func[2]
-
-            attrs = 'name="%s" offset="assign"' % name
-            print '%s<function %s>' % (spaces(indent), attrs)
-
-            for param in params:
-                attrs = 'name="%s" type="%s"' % (param[1], param[0])
-                print '%s<param %s/>' % (spaces(indent * 2), attrs)
-            if ret:
-                attrs = 'type="%s"' % ret
-                print '%s<return %s/>' % (spaces(indent * 2), attrs)
-
-            print '%s</function>' % spaces(indent)
-
-        print '</category>'
-        print
-
-def show_usage():
-    print "Usage: %s [-v] <header> ..." % sys.argv[0]
-    sys.exit(1)
-
-def main():
-    try:
-        args, headers = getopt.getopt(sys.argv[1:], "v")
-    except Exception, e:
-        show_usage()
-    if not headers:
-        show_usage()
-
-    verbose = 0
-    for arg in args:
-        if arg[0] == "-v":
-            verbose += 1
-
-    need_xml_header = True
-    parser = HeaderParser(verbose)
-    for h in headers:
-        h = os.path.abspath(h)
-        hlist = parser.parse(h)
-
-        if need_xml_header:
-            print '<?xml version="1.0"?>'
-            print '<!DOCTYPE OpenGLAPI SYSTEM "%s/gl_API.dtd">' % GLAPI
-            need_xml_header = False
-
-        print
-        print '<!-- %s -->' % (h)
-        print '<OpenGLAPI>'
-        print
-        output_xml(h, hlist)
-        print '</OpenGLAPI>'
-
-if __name__ == '__main__':
-    main()

From f5e757ea60d9abb848d98af01e1986be3e35e236 Mon Sep 17 00:00:00 2001
From: Cooper Yuan <cooperyuan@gmail.com>
Date: Sun, 14 Aug 2011 15:14:17 +0800
Subject: [PATCH 291/600] Destroy context in dri2/glx driver when apps call
 eglDestroyContext

---
 src/egl/drivers/dri2/egl_dri2.c | 21 +++++++++++++++++++--
 src/egl/drivers/glx/egl_glx.c   | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 9a37ea4bbfc..f09ae14f10c 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -725,6 +725,23 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
    return NULL;
 }
 
+/**
+ * Called via eglDestroyContext(), drv->API.DestroyContext().
+ */
+static EGLBoolean
+dri2_destroy_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
+{
+   struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+
+   if (_eglPutContext(ctx)) {
+      dri2_dpy->core->destroyContext(dri2_ctx->dri_context);
+      free(dri2_ctx);
+   }
+
+   return EGL_TRUE;
+}
+
 /**
  * Called via eglMakeCurrent(), drv->API.MakeCurrent().
  */
@@ -765,9 +782,8 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
          drv->API.DestroySurface(drv, disp, old_dsurf);
       if (old_rsurf)
          drv->API.DestroySurface(drv, disp, old_rsurf);
-      /* no destroy? */
       if (old_ctx)
-         _eglPutContext(old_ctx);
+         drv->API.DestroyContext(drv, disp, old_ctx);
 
       return EGL_TRUE;
    } else {
@@ -1348,6 +1364,7 @@ _EGL_MAIN(const char *args)
    dri2_drv->base.API.Initialize = dri2_initialize;
    dri2_drv->base.API.Terminate = dri2_terminate;
    dri2_drv->base.API.CreateContext = dri2_create_context;
+   dri2_drv->base.API.DestroyContext = dri2_destroy_context;
    dri2_drv->base.API.MakeCurrent = dri2_make_current;
    dri2_drv->base.API.GetProcAddress = dri2_get_proc_address;
    dri2_drv->base.API.WaitClient = dri2_wait_client;
diff --git a/src/egl/drivers/glx/egl_glx.c b/src/egl/drivers/glx/egl_glx.c
index 7cf8f4d5514..042936f960e 100644
--- a/src/egl/drivers/glx/egl_glx.c
+++ b/src/egl/drivers/glx/egl_glx.c
@@ -713,6 +713,24 @@ GLX_eglCreateContext(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
    return &GLX_ctx->Base;
 }
 
+/**
+ * Called via eglDestroyContext(), drv->API.DestroyContext().
+ */
+static EGLBoolean
+GLX_eglDestroyContext(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
+{
+   struct GLX_egl_driver *GLX_drv = GLX_egl_driver(drv);
+   struct GLX_egl_context *GLX_ctx = GLX_egl_context(ctx);
+
+   if (_eglPutContext(ctx)) {
+      assert(GLX_ctx);
+      GLX_drv->glXDestroyContext(disp, ctx);
+
+      free(GLX_ctx);
+   }
+
+   return EGL_TRUE;
+}
 
 /**
  * Destroy a surface.  The display is allowed to be uninitialized.
@@ -1142,6 +1160,7 @@ _EGL_MAIN(const char *args)
    GLX_drv->Base.API.Initialize = GLX_eglInitialize;
    GLX_drv->Base.API.Terminate = GLX_eglTerminate;
    GLX_drv->Base.API.CreateContext = GLX_eglCreateContext;
+   GLX_drv->Base.API.DestroyContext = GLX_eglDestroyContext;
    GLX_drv->Base.API.MakeCurrent = GLX_eglMakeCurrent;
    GLX_drv->Base.API.CreateWindowSurface = GLX_eglCreateWindowSurface;
    GLX_drv->Base.API.CreatePixmapSurface = GLX_eglCreatePixmapSurface;

From 85fe9484945cb57ffd49df248b0e5057eba6af04 Mon Sep 17 00:00:00 2001
From: Benjamin Franzke <benjaminfranzke@googlemail.com>
Date: Tue, 9 Aug 2011 14:23:18 +0200
Subject: [PATCH 292/600] egl: Native Display autodetection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

EGL doesnt define howto manage different native platforms.
So mesa has a builtime configurable default platform,
whith non-standard envvar (EGL_PLATFORM) overwrites.
This caused unneeded bugreports, when EGL_PLATFORM was forgotten.

Detection is grouped into basic types of NativeDisplays (which itself
needs to be detected).  The final decision is based on characteristcs
of these basic types:

  File Desciptor based platforms (fbdev):
    - fstat(2) to check for being a fd that belongs to a character device
    - check kernel subsystem (todo)

  Pointer to structuctures (x11, wayland, drm/gbm):
    - mincore(2) to check whether its valid pointer to some memory.
    - magic elements (e.g. pointers to exported symbols):
      o wayland display stores interface type pointer (first elm.)
      o gbm stores pointer to its constructor (first elm.)
      o x11 as a fallback (FIXME?)

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 configure.ac              |   2 +
 src/egl/main/Makefile     |  13 +++++
 src/egl/main/eglapi.c     |   8 +--
 src/egl/main/egldisplay.c | 100 ++++++++++++++++++++++++++++++++++++--
 src/egl/main/egldisplay.h |   2 +-
 5 files changed, 117 insertions(+), 8 deletions(-)

diff --git a/configure.ac b/configure.ac
index 1b1823a211f..9195da9b3a3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1370,6 +1370,8 @@ if test "x$enable_egl" = xyes; then
     EGL_LIB_DEPS="$DLOPEN_LIBS $SELINUX_LIBS -lpthread"
     EGL_DRIVERS_DIRS=""
 
+    AC_CHECK_FUNC(mincore, [DEFINES="$DEFINES -DHAVE_MINCORE"])
+
     if test "$enable_static" != yes; then
         # build egl_glx when libGL is built
         if test "x$enable_glx" = xyes; then
diff --git a/src/egl/main/Makefile b/src/egl/main/Makefile
index 775fbbe178b..c100fbfeb9c 100644
--- a/src/egl/main/Makefile
+++ b/src/egl/main/Makefile
@@ -93,6 +93,19 @@ ifeq ($(firstword $(EGL_PLATFORMS)),fbdev)
 EGL_NATIVE_PLATFORM=_EGL_PLATFORM_FBDEV
 endif
 
+ifneq ($(findstring x11, $(EGL_PLATFORMS)),)
+LOCAL_CFLAGS += -DHAVE_X11_PLATFORM
+endif
+ifneq ($(findstring wayland, $(EGL_PLATFORMS)),)
+LOCAL_CFLAGS += -DHAVE_WAYLAND_PLATFORM
+endif
+ifneq ($(findstring drm, $(EGL_PLATFORMS)),)
+LOCAL_CFLAGS += -DHAVE_DRM_PLATFORM
+endif
+ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),)
+LOCAL_CFLAGS += -DHAVE_FBDEV_PLATFORM
+endif
+
 LOCAL_CFLAGS += \
 	-D_EGL_NATIVE_PLATFORM=$(EGL_NATIVE_PLATFORM) \
 	-D_EGL_DRIVER_SEARCH_DIR=\"$(EGL_DRIVER_INSTALL_DIR)\"
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 0ba7794e2c9..5d186c60e5a 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -301,7 +301,7 @@ _eglUnlockDisplay(_EGLDisplay *dpy)
 EGLDisplay EGLAPIENTRY
 eglGetDisplay(EGLNativeDisplayType nativeDisplay)
 {
-   _EGLPlatformType plat = _eglGetNativePlatform();
+   _EGLPlatformType plat = _eglGetNativePlatform(nativeDisplay);
    _EGLDisplay *dpy = _eglFindDisplay(plat, (void *) nativeDisplay);
    return _eglGetDisplayHandle(dpy);
 }
@@ -538,7 +538,7 @@ eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config,
    EGLSurface ret;
 
    _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);
-   if (disp->Platform != _eglGetNativePlatform())
+   if (disp->Platform != _eglGetNativePlatform(disp->PlatformDisplay))
       RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_NO_SURFACE);
 
    surf = drv->API.CreateWindowSurface(drv, disp, conf, window, attrib_list);
@@ -559,7 +559,7 @@ eglCreatePixmapSurface(EGLDisplay dpy, EGLConfig config,
    EGLSurface ret;
 
    _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);
-   if (disp->Platform != _eglGetNativePlatform())
+   if (disp->Platform != _eglGetNativePlatform(disp->PlatformDisplay))
       RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_PIXMAP, EGL_NO_SURFACE);
 
    surf = drv->API.CreatePixmapSurface(drv, disp, conf, pixmap, attrib_list);
@@ -720,7 +720,7 @@ eglCopyBuffers(EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target)
    EGLBoolean ret;
 
    _EGL_CHECK_SURFACE(disp, surf, EGL_FALSE, drv);
-   if (disp->Platform != _eglGetNativePlatform())
+   if (disp->Platform != _eglGetNativePlatform(disp->PlatformDisplay))
       RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_PIXMAP, EGL_FALSE);
    ret = drv->API.CopyBuffers(drv, disp, surf, target);
 
diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c
index 60f31777272..2849dd96c64 100644
--- a/src/egl/main/egldisplay.c
+++ b/src/egl/main/egldisplay.c
@@ -43,6 +43,23 @@
 #include "eglmutex.h"
 #include "egllog.h"
 
+/* Includes for _eglNativePlatformDetectNativeDisplay */
+#ifdef HAVE_MINCORE
+#include <unistd.h>
+#include <sys/mman.h>
+#endif
+#ifdef HAVE_WAYLAND_PLATFORM
+#include <wayland-client.h>
+#endif
+#ifdef HAVE_DRM_PLATFORM
+#include <gbm.h>
+#endif
+#ifdef HAVE_FBDEV_PLATFORM
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#endif
+
 
 /**
  * Return the native platform by parsing EGL_PLATFORM.
@@ -83,18 +100,95 @@ _eglGetNativePlatformFromEnv(void)
 }
 
 
+/**
+ * Perform validity checks on a generic pointer.
+ */
+static EGLBoolean
+_eglPointerIsDereferencable(void *p)
+{
+#ifdef HAVE_MINCORE
+   uintptr_t addr = (uintptr_t) p;
+   unsigned char valid = 0;
+   const long page_size = getpagesize();
+
+   if (p == NULL)
+      return EGL_FALSE;
+
+   /* align addr to page_size */
+   addr &= ~(page_size - 1);
+
+   if (mincore((void *) addr, page_size, &valid) < 0) {
+      _eglLog(_EGL_DEBUG, "mincore failed: %m");
+      return EGL_FALSE;
+   }
+
+   return (valid & 0x01) == 0x01;
+#else
+   return p != NULL;
+#endif
+}
+
+
+/**
+ * Try detecting native platform with the help of native display characteristcs.
+ */
+static _EGLPlatformType
+_eglNativePlatformDetectNativeDisplay(EGLNativeDisplayType nativeDisplay)
+{
+#ifdef HAVE_FBDEV_PLATFORM
+   struct stat buf;
+#endif
+
+   if (nativeDisplay == EGL_DEFAULT_DISPLAY)
+      return _EGL_INVALID_PLATFORM;
+
+#ifdef HAVE_FBDEV_PLATFORM
+   /* fbdev is the only platform that can be a file descriptor. */
+   if (fstat((intptr_t) nativeDisplay, &buf) == 0 && S_ISCHR(buf.st_mode))
+      return _EGL_PLATFORM_FBDEV;
+#endif
+
+   if (_eglPointerIsDereferencable(nativeDisplay)) {
+      void *first_pointer = *(void **) nativeDisplay;
+
+#ifdef HAVE_WAYLAND_PLATFORM
+      /* wl_display is a wl_proxy, which is a wl_object.
+       * wl_object's first element points to the interfacetype. */
+      if (first_pointer == &wl_display_interface)
+         return _EGL_PLATFORM_WAYLAND;
+#endif
+
+#ifdef HAVE_DRM_PLATFORM
+      /* gbm has a pointer to its constructor as first element. */
+      if (first_pointer == gbm_create_device)
+         return _EGL_PLATFORM_DRM;
+#endif
+
+#ifdef HAVE_X11_PLATFORM
+      /* If not matched to any other platform, fallback to x11. */
+      return _EGL_PLATFORM_X11;
+#endif
+   }
+
+   return _EGL_INVALID_PLATFORM;
+}
+
+
 /**
  * Return the native platform.  It is the platform of the EGL native types.
  */
 _EGLPlatformType
-_eglGetNativePlatform(void)
+_eglGetNativePlatform(EGLNativeDisplayType nativeDisplay)
 {
    static _EGLPlatformType native_platform = _EGL_INVALID_PLATFORM;
 
    if (native_platform == _EGL_INVALID_PLATFORM) {
       native_platform = _eglGetNativePlatformFromEnv();
-      if (native_platform == _EGL_INVALID_PLATFORM)
-         native_platform = _EGL_NATIVE_PLATFORM;
+      if (native_platform == _EGL_INVALID_PLATFORM) {
+         native_platform = _eglNativePlatformDetectNativeDisplay(nativeDisplay);
+         if (native_platform == _EGL_INVALID_PLATFORM)
+            native_platform = _EGL_NATIVE_PLATFORM;
+      }
    }
 
    return native_platform;
diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h
index 9cd4dbfcc8a..05335ada63c 100644
--- a/src/egl/main/egldisplay.h
+++ b/src/egl/main/egldisplay.h
@@ -150,7 +150,7 @@ struct _egl_display
 
 
 extern _EGLPlatformType
-_eglGetNativePlatform(void);
+_eglGetNativePlatform(EGLNativeDisplayType nativeDisplay);
 
 
 extern void

From 2e71c7d4ffa439dace639bd9c66174544dcd02d7 Mon Sep 17 00:00:00 2001
From: Benjamin Franzke <benjaminfranzke@googlemail.com>
Date: Tue, 9 Aug 2011 16:16:33 +0200
Subject: [PATCH 293/600] egl: Log (debug) native platform type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/egl/main/egldisplay.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c
index 2849dd96c64..512ad503f35 100644
--- a/src/egl/main/egldisplay.c
+++ b/src/egl/main/egldisplay.c
@@ -61,23 +61,27 @@
 #endif
 
 
+/**
+ * Map --with-egl-platforms names to platform types.
+ */
+static const struct {
+   _EGLPlatformType platform;
+   const char *name;
+} egl_platforms[_EGL_NUM_PLATFORMS] = {
+   { _EGL_PLATFORM_WINDOWS, "gdi" },
+   { _EGL_PLATFORM_X11, "x11" },
+   { _EGL_PLATFORM_WAYLAND, "wayland" },
+   { _EGL_PLATFORM_DRM, "drm" },
+   { _EGL_PLATFORM_FBDEV, "fbdev" }
+};
+
+
 /**
  * Return the native platform by parsing EGL_PLATFORM.
  */
 static _EGLPlatformType
 _eglGetNativePlatformFromEnv(void)
 {
-   /* map --with-egl-platforms names to platform types */
-   static const struct {
-      _EGLPlatformType platform;
-      const char *name;
-   } egl_platforms[_EGL_NUM_PLATFORMS] = {
-      { _EGL_PLATFORM_WINDOWS, "gdi" },
-      { _EGL_PLATFORM_X11, "x11" },
-      { _EGL_PLATFORM_WAYLAND, "wayland" },
-      { _EGL_PLATFORM_DRM, "drm" },
-      { _EGL_PLATFORM_FBDEV, "fbdev" }
-   };
    _EGLPlatformType plat = _EGL_INVALID_PLATFORM;
    const char *plat_name;
    EGLint i;
@@ -181,16 +185,25 @@ _EGLPlatformType
 _eglGetNativePlatform(EGLNativeDisplayType nativeDisplay)
 {
    static _EGLPlatformType native_platform = _EGL_INVALID_PLATFORM;
+   char *detection_method = NULL;
 
    if (native_platform == _EGL_INVALID_PLATFORM) {
       native_platform = _eglGetNativePlatformFromEnv();
+      detection_method = "environment overwrite";
       if (native_platform == _EGL_INVALID_PLATFORM) {
          native_platform = _eglNativePlatformDetectNativeDisplay(nativeDisplay);
-         if (native_platform == _EGL_INVALID_PLATFORM)
+         detection_method = "autodetected";
+         if (native_platform == _EGL_INVALID_PLATFORM) {
             native_platform = _EGL_NATIVE_PLATFORM;
+            detection_method = "build-time configuration";
+         }
       }
    }
 
+   if (detection_method != NULL)
+      _eglLog(_EGL_DEBUG, "Native platform type: %s (%s)",
+              egl_platforms[native_platform].name, detection_method);
+
    return native_platform;
 }
 

From 5c843785417225ba582878a15a5ae13b7b961aaa Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 1 Aug 2011 13:28:11 -0700
Subject: [PATCH 294/600] glsl: Make move_block_to_cond_assign not care which
 branch it's processing

This will make some future changes a bit easier to digest.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/lower_if_to_cond_assign.cpp | 44 +++++++++++-----------------
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp
index b637eb4fe1d..2d447e043a5 100644
--- a/src/glsl/lower_if_to_cond_assign.cpp
+++ b/src/glsl/lower_if_to_cond_assign.cpp
@@ -94,40 +94,23 @@ check_control_flow(ir_instruction *ir, void *data)
 
 void
 move_block_to_cond_assign(void *mem_ctx,
-			  ir_if *if_ir, ir_variable *cond_var, bool then)
+			  ir_if *if_ir, ir_rvalue *cond_expr,
+			  exec_list *instructions)
 {
-   exec_list *instructions;
-
-   if (then) {
-      instructions = &if_ir->then_instructions;
-   } else {
-      instructions = &if_ir->else_instructions;
-   }
-
    foreach_iter(exec_list_iterator, iter, *instructions) {
       ir_instruction *ir = (ir_instruction *)iter.get();
 
       if (ir->ir_type == ir_type_assignment) {
 	 ir_assignment *assign = (ir_assignment *)ir;
-	 ir_rvalue *cond_expr;
-	 ir_dereference *deref = new(mem_ctx) ir_dereference_variable(cond_var);
-
-	 if (then) {
-	    cond_expr = deref;
-	 } else {
-	    cond_expr = new(mem_ctx) ir_expression(ir_unop_logic_not,
-						   glsl_type::bool_type,
-						   deref,
-						   NULL);
-	 }
 
 	 if (!assign->condition) {
-	    assign->condition = cond_expr;
+	    assign->condition = cond_expr->clone(mem_ctx, NULL);
 	 } else {
-	    assign->condition = new(mem_ctx) ir_expression(ir_binop_logic_and,
-							   glsl_type::bool_type,
-							   cond_expr,
-							   assign->condition);
+	    assign->condition =
+	       new(mem_ctx) ir_expression(ir_binop_logic_and,
+					  glsl_type::bool_type,
+					  cond_expr->clone(mem_ctx, NULL),
+					  assign->condition);
 	 }
       }
 
@@ -187,8 +170,15 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
    /* Now, move all of the instructions out of the if blocks, putting
     * conditions on assignments.
     */
-   move_block_to_cond_assign(mem_ctx, ir, cond_var, true);
-   move_block_to_cond_assign(mem_ctx, ir, cond_var, false);
+   move_block_to_cond_assign(mem_ctx, ir, deref,
+			     &ir->then_instructions);
+
+   ir_rvalue *inverse =
+      new(mem_ctx) ir_expression(ir_unop_logic_not,
+				 glsl_type::bool_type,
+				 deref->clone(mem_ctx, NULL),
+				 NULL);
+   move_block_to_cond_assign(mem_ctx, ir, inverse, &ir->else_instructions);
 
    ir->remove();
 

From 13df36ecb6c24ea36534fa0dfea917aca8233710 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 1 Aug 2011 13:36:12 -0700
Subject: [PATCH 295/600] glsl: Replace foreach_iter with foreach_list_safe

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/lower_if_to_cond_assign.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp
index 2d447e043a5..5c74bc1816c 100644
--- a/src/glsl/lower_if_to_cond_assign.cpp
+++ b/src/glsl/lower_if_to_cond_assign.cpp
@@ -97,8 +97,8 @@ move_block_to_cond_assign(void *mem_ctx,
 			  ir_if *if_ir, ir_rvalue *cond_expr,
 			  exec_list *instructions)
 {
-   foreach_iter(exec_list_iterator, iter, *instructions) {
-      ir_instruction *ir = (ir_instruction *)iter.get();
+   foreach_list_safe(node, instructions) {
+      ir_instruction *ir = (ir_instruction *) node;
 
       if (ir->ir_type == ir_type_assignment) {
 	 ir_assignment *assign = (ir_assignment *)ir;

From 4a026d6ba50034c326eb6cfd0b555f57d83ab609 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 1 Aug 2011 13:55:46 -0700
Subject: [PATCH 296/600] glsl: Slight change to the code generated by
 if-flattening

Now the condition (for the then-clause) and the inverse condition (for
the else-clause) get written to separate temporary variables.  In the
presence of complex conditions, this shouldn't result in more code
being generated.  If the original if-statement was

    if (a && b && c && d && e) {
        ...
    } else {
        ...
    }

The lowered code will be

   if_to_cond_assign_then = a && b && c && d && e;
   ...
   if_to_cond_assign_else = !if_to_cond_assign_then;
   ...

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/lower_if_to_cond_assign.cpp | 58 ++++++++++++++++++----------
 1 file changed, 38 insertions(+), 20 deletions(-)

diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp
index 5c74bc1816c..a7097556c14 100644
--- a/src/glsl/lower_if_to_cond_assign.cpp
+++ b/src/glsl/lower_if_to_cond_assign.cpp
@@ -136,7 +136,6 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
       return visit_continue;
 
    bool found_control_flow = false;
-   ir_variable *cond_var;
    ir_assignment *assign;
    ir_dereference_variable *deref;
 
@@ -154,31 +153,50 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
 
    void *mem_ctx = ralloc_parent(ir);
 
-   /* Store the condition to a variable so the assignment conditions are
-    * simpler.
+   /* Store the condition to a variable.  Move all of the instructions from
+    * the then-clause of the if-statement.  Use the condition variable as a
+    * condition for all assignments.
     */
-   cond_var = new(mem_ctx) ir_variable(glsl_type::bool_type,
-				       "if_to_cond_assign_condition",
-				       ir_var_temporary);
-   ir->insert_before(cond_var);
+   ir_variable *const then_var =
+      new(mem_ctx) ir_variable(glsl_type::bool_type,
+			       "if_to_cond_assign_then",
+			       ir_var_temporary);
+   ir->insert_before(then_var);
 
-   deref = new(mem_ctx) ir_dereference_variable(cond_var);
-   assign = new(mem_ctx) ir_assignment(deref,
-				       ir->condition, NULL);
+   ir_dereference_variable *then_cond =
+      new(mem_ctx) ir_dereference_variable(then_var);
+
+   assign = new(mem_ctx) ir_assignment(then_cond, ir->condition);
    ir->insert_before(assign);
 
-   /* Now, move all of the instructions out of the if blocks, putting
-    * conditions on assignments.
-    */
-   move_block_to_cond_assign(mem_ctx, ir, deref,
+   move_block_to_cond_assign(mem_ctx, ir, then_cond,
 			     &ir->then_instructions);
 
-   ir_rvalue *inverse =
-      new(mem_ctx) ir_expression(ir_unop_logic_not,
-				 glsl_type::bool_type,
-				 deref->clone(mem_ctx, NULL),
-				 NULL);
-   move_block_to_cond_assign(mem_ctx, ir, inverse, &ir->else_instructions);
+   /* If there are instructions in the else-clause, store the inverse of the
+    * condition to a variable.  Move all of the instructions from the
+    * else-clause if the if-statement.  Use the (inverse) condition variable
+    * as a condition for all assignments.
+    */
+   if (!ir->else_instructions.is_empty()) {
+      ir_variable *const else_var =
+	 new(mem_ctx) ir_variable(glsl_type::bool_type,
+				  "if_to_cond_assign_else",
+				  ir_var_temporary);
+      ir->insert_before(else_var);
+
+      ir_dereference_variable *else_cond =
+	 new(mem_ctx) ir_dereference_variable(else_var);
+
+      ir_rvalue *inverse =
+	 new(mem_ctx) ir_expression(ir_unop_logic_not,
+				    then_cond->clone(mem_ctx, NULL));
+
+      assign = new(mem_ctx) ir_assignment(else_cond, inverse);
+      ir->insert_before(assign);
+
+      move_block_to_cond_assign(mem_ctx, ir, else_cond,
+				&ir->else_instructions);
+   }
 
    ir->remove();
 

From a352e2d08e0a141298275e77f25541218a97afb7 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 1 Aug 2011 14:13:10 -0700
Subject: [PATCH 297/600] glsl: Modify strategy for accumulating conditions
 when lowering if-statements

Previously if-statements were lowered from inner-most to outer-most
(i.e., bottom-up).  All assignments within an if-statement would have
the condition of the if-statement appended to its existing condition.
As a result the assignments from a deeply nested if-statement would
have a very long and complex condition.

Several shaders in the OpenGL ES2 conformance test suite contain
non-constant array indexing that has been lowered by the shader
writer.  These tests usually look something like:

    if (i == 0) {
        value = array[0];
    } else if (i == 1) {
        value = array[1];
    } else ...

The IR for the last assignment ends up as:

    (assign (expression bool && (expression bool ! (var_ref if_to_cond_assign_condition) ) (expression bool && (expression bool ! (var_ref if_to_cond_assign_condition@20) ) (expression bool && (expression bool ! (var_ref if_to_cond_assign_condition@22) ) (expression bool && (expression bool ! (var_ref if_to_cond_assign_condition@24) ) (var_ref if_to_cond_assign_condition@26) ) ) ) )  (x) (var_ref value) (array_ref (var_ref array) (constant int (5)))

The Mesa IR that is generated from this is just as awesome as you
might expect.

Three changes are made to the way if-statements are lowered.

1. Two condition variables, if_to_cond_assign_then and
if_to_cond_assign_else, are created for each if-then-else structure.
The former contains the "positive" condition, and the later contains
the "negative" condtion.  This change was implemented in the previous
patch.

2. Each condition variable is added to a hash-table when it is created.

3. When lowering an if-statement, assignments to existing condtion
variables get the current condition anded.  This ensures that nested
condition variables are only set to true when the condition variable
for all outer if-statements is also true.

Changes #1 and #3 combine to ensure the correctness of the resulting
code.

4. When a condition assignment is encountered with a condition that is
a dereference of a previously added condition variable, the condition
is not modified.

Change #4 prevents the continuous accumulation of conditions on
assignments.

If the original if-statements were:

    if (x) {
        if (a && b && c && d && e) {
            ...
        } else {
            ...
        }
    } else {
        if (g && h && i && j && k) {
            ...
        } else {
            ...
        }
    }

The lowered code will be

    if_to_cond_assign_then@1 = x;
    if_to_cond_assign_then@2 = a && b && c && d && e
        && if_to_cond_assign_then@1;
    ...
    if_to_cond_assign_else@2 = !if_to_cond_assign_then
        && if_to_cond_assign_then@1;
    ...

    if_to_cond_assign_else@1 = !if_to_cond_assign_then@1;
    if_to_cond_assign_then@3 = g && h && i && j;
        && if_to_cond_assign_else@1;
    ...
    if_to_cond_assign_else@3 = !if_to_cond_assign_then
        && if_to_cond_assign_else@1;
    ...

Depending on how instructions are emitted, there may be an extra
instruction due to the duplication of the '&&
if_to_cond_assign_{then,else}@1' on the nested else conditions.  In
addition, this may cause some unnecessary register pressure since in
the simple case (where the nested conditions are not complex) the
nested then-condition variables are live longer than strictly
necessary.

Before this change, one of the shaders in the OpenGL ES2 conformance
test suite's acos_float_frag_xvary generated 348 Mesa IR instructions.
After this change it only generates 124.  Many, but not all, of these
instructions would have also been eliminated by CSE.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/glsl/lower_if_to_cond_assign.cpp | 66 +++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 11 deletions(-)

diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp
index a7097556c14..94b24db1b29 100644
--- a/src/glsl/lower_if_to_cond_assign.cpp
+++ b/src/glsl/lower_if_to_cond_assign.cpp
@@ -47,6 +47,7 @@
 
 #include "glsl_types.h"
 #include "ir.h"
+#include "program/hash_table.h"
 
 class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
 public:
@@ -55,6 +56,14 @@ public:
       this->progress = false;
       this->max_depth = max_depth;
       this->depth = 0;
+
+      this->condition_variables = hash_table_ctor(0, hash_table_pointer_hash,
+						  hash_table_pointer_compare);
+   }
+
+   ~ir_if_to_cond_assign_visitor()
+   {
+      hash_table_dtor(this->condition_variables);
    }
 
    ir_visitor_status visit_enter(ir_if *);
@@ -63,6 +72,8 @@ public:
    bool progress;
    unsigned max_depth;
    unsigned depth;
+
+   struct hash_table *condition_variables;
 };
 
 bool
@@ -95,7 +106,8 @@ check_control_flow(ir_instruction *ir, void *data)
 void
 move_block_to_cond_assign(void *mem_ctx,
 			  ir_if *if_ir, ir_rvalue *cond_expr,
-			  exec_list *instructions)
+			  exec_list *instructions,
+			  struct hash_table *ht)
 {
    foreach_list_safe(node, instructions) {
       ir_instruction *ir = (ir_instruction *) node;
@@ -103,14 +115,33 @@ move_block_to_cond_assign(void *mem_ctx,
       if (ir->ir_type == ir_type_assignment) {
 	 ir_assignment *assign = (ir_assignment *)ir;
 
-	 if (!assign->condition) {
-	    assign->condition = cond_expr->clone(mem_ctx, NULL);
-	 } else {
-	    assign->condition =
-	       new(mem_ctx) ir_expression(ir_binop_logic_and,
-					  glsl_type::bool_type,
-					  cond_expr->clone(mem_ctx, NULL),
-					  assign->condition);
+	 if (hash_table_find(ht, assign) == NULL) {
+	    hash_table_insert(ht, assign, assign);
+
+	    /* If the LHS of the assignment is a condition variable that was
+	     * previously added, insert an additional assignment of false to
+	     * the variable.
+	     */
+	    const bool assign_to_cv =
+	       hash_table_find(ht, assign->lhs->variable_referenced()) != NULL;
+
+	    if (!assign->condition) {
+	       if (assign_to_cv) {
+		  assign->rhs =
+		     new(mem_ctx) ir_expression(ir_binop_logic_and,
+						glsl_type::bool_type,
+						cond_expr->clone(mem_ctx, NULL),
+						assign->rhs);
+	       } else {
+		  assign->condition = cond_expr->clone(mem_ctx, NULL);
+	       }
+	    } else {
+	       assign->condition =
+		  new(mem_ctx) ir_expression(ir_binop_logic_and,
+					     glsl_type::bool_type,
+					     cond_expr->clone(mem_ctx, NULL),
+					     assign->condition);
+	    }
 	 }
       }
 
@@ -125,6 +156,7 @@ ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir)
 {
    (void) ir;
    this->depth++;
+
    return visit_continue;
 }
 
@@ -170,7 +202,13 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
    ir->insert_before(assign);
 
    move_block_to_cond_assign(mem_ctx, ir, then_cond,
-			     &ir->then_instructions);
+			     &ir->then_instructions,
+			     this->condition_variables);
+
+   /* Add the new condition variable to the hash table.  This allows us to
+    * find this variable when lowering other (enclosing) if-statements.
+    */
+   hash_table_insert(this->condition_variables, then_var, then_var);
 
    /* If there are instructions in the else-clause, store the inverse of the
     * condition to a variable.  Move all of the instructions from the
@@ -195,7 +233,13 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
       ir->insert_before(assign);
 
       move_block_to_cond_assign(mem_ctx, ir, else_cond,
-				&ir->else_instructions);
+				&ir->else_instructions,
+				this->condition_variables);
+
+      /* Add the new condition variable to the hash table.  This allows us to
+       * find this variable when lowering other (enclosing) if-statements.
+       */
+      hash_table_insert(this->condition_variables, else_var, else_var);
    }
 
    ir->remove();

From 63720114b4234f5522eb8dee8f4b0c0db561a8c3 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 5 Aug 2011 16:39:56 -0700
Subject: [PATCH 298/600] glw: Remove GLw source.

libGLw is an old OpenGL widget library with optional Motif support.
It almost never changes and very few people actually still care about
it, so we've decided to ship it separately.

The new home for libGLw is: git://git.freedesktop.org/mesa/glw/

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 configs/darwin         |   1 -
 configs/default        |   2 +-
 configs/freebsd-dri    |   2 +-
 configs/linux-cell     |   1 -
 configs/linux-dri-xcb  |   2 +-
 configs/linux-indirect |   2 +-
 configure.ac           |  76 -----
 src/glw/GLwDrawA.c     | 684 -----------------------------------------
 src/glw/GLwDrawA.h     | 195 ------------
 src/glw/GLwDrawAP.h    | 130 --------
 src/glw/GLwMDrawA.c    |  41 ---
 src/glw/GLwMDrawA.h    |  41 ---
 src/glw/GLwMDrawAP.h   |  41 ---
 src/glw/Makefile       |  74 -----
 src/glw/README         |  56 ----
 src/glw/glw.pc.in      |  13 -
 16 files changed, 4 insertions(+), 1357 deletions(-)
 delete mode 100644 src/glw/GLwDrawA.c
 delete mode 100644 src/glw/GLwDrawA.h
 delete mode 100644 src/glw/GLwDrawAP.h
 delete mode 100644 src/glw/GLwMDrawA.c
 delete mode 100644 src/glw/GLwMDrawA.h
 delete mode 100644 src/glw/GLwMDrawAP.h
 delete mode 100644 src/glw/Makefile
 delete mode 100644 src/glw/README
 delete mode 100644 src/glw/glw.pc.in

diff --git a/configs/darwin b/configs/darwin
index 41e7ba800bc..83f417ce0fd 100644
--- a/configs/darwin
+++ b/configs/darwin
@@ -50,7 +50,6 @@ GLU_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB)
 GLW_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) -L$(INSTALL_DIR)/$(LIB_DIR) -L$(X11_DIR)/$(LIB_DIR) -lX11 -lXt
 APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L$(INSTALL_DIR)/$(LIB_DIR) -L$(X11_DIR)/$(LIB_DIR) -lX11 -lXmu -lXt -lXi -lm
 
-# omit glw lib for now:
 SRC_DIRS = glsl mapi/glapi mapi/vgapi glx/apple mesa gallium glu
 GLU_DIRS = sgi
 DRIVER_DIRS = osmesa
diff --git a/configs/default b/configs/default
index e839a1e500a..078c85e8b4d 100644
--- a/configs/default
+++ b/configs/default
@@ -105,7 +105,7 @@ MOTIF_CFLAGS = -I/usr/include/Motif1.2
 # Directories to build
 LIB_DIR = lib
 SRC_DIRS = glsl mapi/glapi mapi/vgapi mesa \
-	gallium egl gallium/winsys gallium/targets glu glw
+	gallium egl gallium/winsys gallium/targets glu
 GLU_DIRS = sgi
 DRIVER_DIRS = x11 osmesa
 
diff --git a/configs/freebsd-dri b/configs/freebsd-dri
index fdf4b293acd..3c83872c08c 100644
--- a/configs/freebsd-dri
+++ b/configs/freebsd-dri
@@ -42,7 +42,7 @@ GLW_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -L/usr/local/lib -lGL -lXt -lX11
 
 
 # Directories
-SRC_DIRS = glx gallium mesa glu glw
+SRC_DIRS = glx gallium mesa glu
 DRIVER_DIRS = dri
 
 DRM_SOURCE_PATH=$(TOP)/../drm
diff --git a/configs/linux-cell b/configs/linux-cell
index e87e69a8065..7f38da971d1 100644
--- a/configs/linux-cell
+++ b/configs/linux-cell
@@ -36,7 +36,6 @@ CFLAGS = $(COMMON_C_CPP_FLAGS) -Wmissing-prototypes -std=c99
 CXXFLAGS = $(COMMON_C_CPP_FLAGS)
 
 
-# Omitting glw here:
 SRC_DIRS = glsl mapi/glapi mapi/vgapi mesa \
 	gallium gallium/winsys gallium/targets glu
 
diff --git a/configs/linux-dri-xcb b/configs/linux-dri-xcb
index 021aa3e8b20..15702da1904 100644
--- a/configs/linux-dri-xcb
+++ b/configs/linux-dri-xcb
@@ -49,7 +49,7 @@ DRI_LIB_DEPS  = $(MESA_MODULES) $(EXTRA_LIB_PATH) -lm -lpthread -lexpat -ldl $(L
 GL_LIB_DEPS   = $(EXTRA_LIB_PATH) -lX11 -lXext -lXxf86vm -lm -lpthread -ldl \
                 $(LIBDRM_LIB) $(shell pkg-config --libs xcb) $(shell pkg-config --libs x11-xcb) $(shell pkg-config --libs xcb-glx)
 
-SRC_DIRS = glx gallium mesa glu glw
+SRC_DIRS = glx gallium mesa glu
 
 DRIVER_DIRS = dri
 DRI_DIRS = i810 i915 mach64 mga r128 r200 r300 radeon \
diff --git a/configs/linux-indirect b/configs/linux-indirect
index 82868c4a134..5592a8f2978 100644
--- a/configs/linux-indirect
+++ b/configs/linux-indirect
@@ -48,5 +48,5 @@ GL_LIB_DEPS   = $(EXTRA_LIB_PATH) -lX11 -lXext -lXxf86vm -lm -lpthread -ldl
 
 
 # Directories
-SRC_DIRS = glx glu glw
+SRC_DIRS = glx glu
 DRIVER_DIRS =
diff --git a/configure.ac b/configure.ac
index 9195da9b3a3..6fa5e5177a4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -359,7 +359,6 @@ fi
 GL_LIB_NAME='lib$(GL_LIB).'${LIB_EXTENSION}
 GLU_LIB_NAME='lib$(GLU_LIB).'${LIB_EXTENSION}
 GLUT_LIB_NAME='lib$(GLUT_LIB).'${LIB_EXTENSION}
-GLW_LIB_NAME='lib$(GLW_LIB).'${LIB_EXTENSION}
 OSMESA_LIB_NAME='lib$(OSMESA_LIB).'${LIB_EXTENSION}
 EGL_LIB_NAME='lib$(EGL_LIB).'${LIB_EXTENSION}
 GLESv1_CM_LIB_NAME='lib$(GLESv1_CM_LIB).'${LIB_EXTENSION}
@@ -372,7 +371,6 @@ GBM_LIB_NAME='lib$(GBM_LIB).'${LIB_EXTENSION}
 GL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
 GLU_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLU_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
 GLUT_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLUT_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
-GLW_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GLW_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
 OSMESA_LIB_GLOB=${LIB_PREFIX_GLOB}'$(OSMESA_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
 EGL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(EGL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
 EGL_LIB_GLOB=${LIB_PREFIX_GLOB}'$(EGL_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTENSION}'*'
@@ -386,7 +384,6 @@ GBM_LIB_GLOB=${LIB_PREFIX_GLOB}'$(GBM_LIB)'${LIB_VERSION_SEPARATOR}'*'${LIB_EXTE
 AC_SUBST([GL_LIB_NAME])
 AC_SUBST([GLU_LIB_NAME])
 AC_SUBST([GLUT_LIB_NAME])
-AC_SUBST([GLW_LIB_NAME])
 AC_SUBST([OSMESA_LIB_NAME])
 AC_SUBST([EGL_LIB_NAME])
 AC_SUBST([GLESv1_CM_LIB_NAME])
@@ -399,7 +396,6 @@ AC_SUBST([GBM_LIB_NAME])
 AC_SUBST([GL_LIB_GLOB])
 AC_SUBST([GLU_LIB_GLOB])
 AC_SUBST([GLUT_LIB_GLOB])
-AC_SUBST([GLW_LIB_GLOB])
 AC_SUBST([OSMESA_LIB_GLOB])
 AC_SUBST([EGL_LIB_GLOB])
 AC_SUBST([GLESv1_CM_LIB_GLOB])
@@ -1604,77 +1600,6 @@ AC_SUBST([GLU_PC_REQ_PRIV])
 AC_SUBST([GLU_PC_LIB_PRIV])
 AC_SUBST([GLU_PC_CFLAGS])
 
-dnl
-dnl GLw configuration
-dnl
-AC_ARG_ENABLE([glw],
-    [AS_HELP_STRING([--disable-glw],
-        [enable Xt/Motif widget library @<:@default=enabled@:>@])],
-    [enable_glw="$enableval"],
-    [enable_glw=yes])
-dnl Don't build GLw on osmesa
-if test "x$enable_glw" = xyes -a "x$enable_glx" = xno; then
-    AC_MSG_NOTICE([Disabling GLw since there is no OpenGL driver])
-    enable_glw=no
-fi
-AC_ARG_ENABLE([motif],
-    [AS_HELP_STRING([--enable-motif],
-        [use Motif widgets in GLw @<:@default=disabled@:>@])],
-    [enable_motif="$enableval"],
-    [enable_motif=no])
-
-if test "x$enable_glw" = xyes; then
-    SRC_DIRS="$SRC_DIRS glw"
-    if test "$x11_pkgconfig" = yes; then
-        PKG_CHECK_MODULES([GLW],[x11 xt])
-        GLW_PC_REQ_PRIV="x11 xt"
-        GLW_LIB_DEPS="$GLW_LIBS"
-    else
-        # should check these...
-        GLW_LIB_DEPS="$X_LIBS -lXt -lX11"
-        GLW_PC_LIB_PRIV="$GLW_LIB_DEPS"
-        GLW_PC_CFLAGS="$X11_INCLUDES"
-    fi
-
-    GLW_SOURCES="GLwDrawA.c"
-    MOTIF_CFLAGS=
-    if test "x$enable_motif" = xyes; then
-        GLW_SOURCES="$GLW_SOURCES GLwMDrawA.c"
-        AC_PATH_PROG([MOTIF_CONFIG], [motif-config], [no])
-        if test "x$MOTIF_CONFIG" != xno; then
-            MOTIF_CFLAGS=`$MOTIF_CONFIG --cflags`
-            MOTIF_LIBS=`$MOTIF_CONFIG --libs`
-        else
-            AC_CHECK_HEADER([Xm/PrimitiveP.h], [],
-                [AC_MSG_ERROR([Can't locate Motif headers])])
-            AC_CHECK_LIB([Xm], [XmGetPixmap], [MOTIF_LIBS="-lXm"],
-                [AC_MSG_ERROR([Can't locate Motif Xm library])])
-        fi
-        # MOTIF_LIBS is prepended to GLW_LIB_DEPS since Xm needs Xt/X11
-        GLW_LIB_DEPS="$MOTIF_LIBS $GLW_LIB_DEPS"
-        GLW_PC_LIB_PRIV="$MOTIF_LIBS $GLW_PC_LIB_PRIV"
-        GLW_PC_CFLAGS="$MOTIF_CFLAGS $GLW_PC_CFLAGS"
-    fi
-
-    # If static, empty GLW_LIB_DEPS and add libs for programs to link
-    GLW_PC_LIB_PRIV="$GLW_PC_LIB_PRIV"
-    if test "$enable_static" = no; then
-        GLW_MESA_DEPS='-l$(GL_LIB)'
-        GLW_LIB_DEPS="$GLW_LIB_DEPS"
-    else
-        APP_LIB_DEPS="$APP_LIB_DEPS $GLW_LIB_DEPS"
-        GLW_LIB_DEPS=""
-        GLW_MESA_DEPS=""
-    fi
-fi
-AC_SUBST([GLW_LIB_DEPS])
-AC_SUBST([GLW_MESA_DEPS])
-AC_SUBST([GLW_SOURCES])
-AC_SUBST([MOTIF_CFLAGS])
-AC_SUBST([GLW_PC_REQ_PRIV])
-AC_SUBST([GLW_PC_LIB_PRIV])
-AC_SUBST([GLW_PC_CFLAGS])
-
 dnl
 dnl Program library dependencies
 dnl    Only libm is added here if necessary as the libraries should
@@ -2053,7 +1978,6 @@ esac
 
 echo ""
 echo "        GLU:             $enable_glu"
-echo "        GLw:             $enable_glw (Motif: $enable_motif)"
 
 dnl EGL
 echo ""
diff --git a/src/glw/GLwDrawA.c b/src/glw/GLwDrawA.c
deleted file mode 100644
index 30304a40801..00000000000
--- a/src/glw/GLwDrawA.c
+++ /dev/null
@@ -1,684 +0,0 @@
-/*
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
- */
-
-/*
- *
- * This file has been slightly modified from the original for use with Mesa
- *
- *     Jeroen van der Zijp
- *
- *     jvz@cyberia.cfdrc.com
- *
- */
-#include <X11/IntrinsicP.h>
-#include <X11/StringDefs.h>
-#include <GL/glx.h>
-#include <GL/gl.h>
-#ifdef __GLX_MOTIF
-#include <Xm/PrimitiveP.h>
-#include "GLwMDrawAP.h"
-#else 
-#include "GLwDrawAP.h"
-#endif 
-#include <assert.h>
-#include <stdio.h>
-
-#ifdef __GLX_MOTIF
-#define GLwDrawingAreaWidget             GLwMDrawingAreaWidget
-#define GLwDrawingAreaClassRec           GLwMDrawingAreaClassRec
-#define glwDrawingAreaClassRec           glwMDrawingAreaClassRec
-#define glwDrawingAreaWidgetClass        glwMDrawingAreaWidgetClass
-#define GLwDrawingAreaRec                GLwMDrawingAreaRec
-#endif 
-
-#define ATTRIBLIST_SIZE 32
-
-#define offset(field) XtOffset(GLwDrawingAreaWidget,glwDrawingArea.field)
-
-
-/* forward definitions */
-static void createColormap(GLwDrawingAreaWidget w,int offset,XrmValue *value);
-static void Initialize(GLwDrawingAreaWidget req,GLwDrawingAreaWidget neww,ArgList args,Cardinal *num_args);
-static void Realize(Widget w,Mask *valueMask,XSetWindowAttributes *attributes);
-static void Redraw(GLwDrawingAreaWidget w,XEvent *event,Region region);
-static void Resize(GLwDrawingAreaWidget glw);
-static void Destroy(GLwDrawingAreaWidget glw);
-static void glwInput(GLwDrawingAreaWidget glw,XEvent *event,String *params,Cardinal *numParams);
-
-
-
-static char defaultTranslations[] =
-#ifdef __GLX_MOTIF
-     "<Key>osfHelp:PrimitiveHelp() \n"
-#endif
-    "<KeyDown>:   glwInput() \n\
-     <KeyUp>:     glwInput() \n\
-     <BtnDown>:   glwInput() \n\
-     <BtnUp>:     glwInput() \n\
-     <BtnMotion>: glwInput() ";
-
-
-static XtActionsRec actions[] = {
-  {"glwInput",(XtActionProc)glwInput},                /* key or mouse input */
-  };
-
-
-/*
- * There is a bit of unusual handling of the resources here.
- * Because Xt insists on allocating the colormap resource when it is
- * processing the core resources (even if we redeclare the colormap
- * resource here, we need to do a little trick.  When Xt first allocates
- * the colormap, we allow it to allocate the default one, since we have
- * not yet determined the appropriate visual (which is determined from
- * resources parsed after the colormap).  We also let it allocate colors
- * in that default colormap.
- *
- * In the initialize proc we calculate the actual visual.  Then, we
- * reobtain the colormap resource using XtGetApplicationResources in
- * the initialize proc.  If requested, we also reallocate colors in
- * that colormap using the same method.
- */
-
-static XtResource resources[] = {
-  /* The GLX attributes.  Add any new attributes here */
-
-  {GLwNbufferSize, GLwCBufferSize, XtRInt, sizeof (int),
-       offset(bufferSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNlevel, GLwCLevel, XtRInt, sizeof (int),
-       offset(level), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNrgba, GLwCRgba, XtRBoolean, sizeof (Boolean),
-       offset(rgba), XtRImmediate, (XtPointer) FALSE},
-  
-  {GLwNdoublebuffer, GLwCDoublebuffer, XtRBoolean, sizeof (Boolean),
-       offset(doublebuffer), XtRImmediate, (XtPointer) FALSE},
-  
-  {GLwNstereo, GLwCStereo, XtRBoolean, sizeof (Boolean),
-       offset(stereo), XtRImmediate, (XtPointer) FALSE},
-  
-  {GLwNauxBuffers, GLwCAuxBuffers, XtRInt, sizeof (int),
-       offset(auxBuffers), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNredSize, GLwCColorSize, XtRInt, sizeof (int),
-       offset(redSize), XtRImmediate, (XtPointer) 1},
-  
-  {GLwNgreenSize, GLwCColorSize, XtRInt, sizeof (int),
-       offset(greenSize), XtRImmediate, (XtPointer) 1},
-  
-  {GLwNblueSize, GLwCColorSize, XtRInt, sizeof (int),
-       offset(blueSize), XtRImmediate, (XtPointer) 1},
-  
-  {GLwNalphaSize, GLwCAlphaSize, XtRInt, sizeof (int),
-       offset(alphaSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNdepthSize, GLwCDepthSize, XtRInt, sizeof (int),
-       offset(depthSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNstencilSize, GLwCStencilSize, XtRInt, sizeof (int),
-       offset(stencilSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNaccumRedSize, GLwCAccumColorSize, XtRInt, sizeof (int),
-       offset(accumRedSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNaccumGreenSize, GLwCAccumColorSize, XtRInt, sizeof (int),
-       offset(accumGreenSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNaccumBlueSize, GLwCAccumColorSize, XtRInt, sizeof (int),
-       offset(accumBlueSize), XtRImmediate, (XtPointer) 0},
-  
-  {GLwNaccumAlphaSize, GLwCAccumAlphaSize, XtRInt, sizeof (int),
-       offset(accumAlphaSize), XtRImmediate, (XtPointer) 0},
-  
-  /* the attribute list */
-  {GLwNattribList, GLwCAttribList, XtRPointer, sizeof(int *),
-       offset(attribList), XtRImmediate, (XtPointer) NULL},
-
-  /* the visual info */
-  {GLwNvisualInfo, GLwCVisualInfo, GLwRVisualInfo, sizeof (XVisualInfo *),
-       offset(visualInfo), XtRImmediate, (XtPointer) NULL},
-
-  /* miscellaneous resources */
-  {GLwNinstallColormap, GLwCInstallColormap, XtRBoolean, sizeof (Boolean),
-       offset(installColormap), XtRImmediate, (XtPointer) TRUE},
-
-  {GLwNallocateBackground, GLwCAllocateColors, XtRBoolean, sizeof (Boolean),
-       offset(allocateBackground), XtRImmediate, (XtPointer) FALSE},
-
-  {GLwNallocateOtherColors, GLwCAllocateColors, XtRBoolean, sizeof (Boolean),
-       offset(allocateOtherColors), XtRImmediate, (XtPointer) FALSE},
-
-  {GLwNinstallBackground, GLwCInstallBackground, XtRBoolean, sizeof (Boolean),
-       offset(installBackground), XtRImmediate, (XtPointer) TRUE},
-
-  {GLwNginitCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList),
-       offset(ginitCallback), XtRImmediate, (XtPointer) NULL},
-
-  {GLwNinputCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList),
-       offset(inputCallback), XtRImmediate, (XtPointer) NULL},
-
-  {GLwNresizeCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList),
-       offset(resizeCallback), XtRImmediate, (XtPointer) NULL},
-
-  {GLwNexposeCallback, GLwCCallback, XtRCallback, sizeof (XtCallbackList),
-       offset(exposeCallback), XtRImmediate, (XtPointer) NULL},
-
-  /* Changes to Motif primitive resources */
-#ifdef __GLX_MOTIF
-  {XmNtraversalOn, XmCTraversalOn, XmRBoolean, sizeof (Boolean),
-   XtOffset (GLwDrawingAreaWidget, primitive.traversal_on), XmRImmediate,
-   (XtPointer)FALSE},
-  
-  /* highlighting is normally disabled, as when Motif tries to disable
-   * highlighting, it tries to reset the color back to the parent's
-   * background (usually Motif blue).  Unfortunately, that is in a
-   * different colormap, and doesn't work too well.
-   */
-  {XmNhighlightOnEnter, XmCHighlightOnEnter, XmRBoolean, sizeof (Boolean),
-   XtOffset (GLwDrawingAreaWidget, primitive.highlight_on_enter),
-   XmRImmediate, (XtPointer) FALSE},
-  
-  {XmNhighlightThickness, XmCHighlightThickness, XmRHorizontalDimension,
-   sizeof (Dimension),
-   XtOffset (GLwDrawingAreaWidget, primitive.highlight_thickness),
-   XmRImmediate, (XtPointer) 0},
-#endif 
-  };
-
-
-/*
-** The following resources are reobtained using XtGetApplicationResources
-** in the initialize proc.
-*/
-
-/* The colormap */
-static XtResource initializeResources[] = {
-  /* reobtain the colormap with the new visual */
-  {XtNcolormap, XtCColormap, XtRColormap, sizeof(Colormap),
-   XtOffset(GLwDrawingAreaWidget, core.colormap),
-   XtRCallProc,(XtPointer) createColormap},
-  };
-
-
-/* reallocate any colors we need in the new colormap */
-  
-/* The background is obtained only if the allocateBackground resource is TRUE*/
-static XtResource backgroundResources[] = {
-#ifdef __GLX_MOTIF
-  {XmNbackground, XmCBackground,XmRPixel, 
-   sizeof(Pixel),XtOffset(GLwDrawingAreaWidget,core.background_pixel),
-   XmRString,(XtPointer)"lightgrey"},
-   /*XmRCallProc,(XtPointer)_XmBackgroundColorDefault},*/
-
-  {XmNbackgroundPixmap,XmCPixmap,XmRXmBackgroundPixmap, 
-   sizeof(Pixmap),XtOffset(GLwDrawingAreaWidget,core.background_pixmap),
-   XmRImmediate,(XtPointer)XmUNSPECIFIED_PIXMAP},
-
-#else
-  {XtNbackground,XtCBackground,XtRPixel,sizeof(Pixel),
-   XtOffset(GLwDrawingAreaWidget,core.background_pixel),
-   XtRString,(XtPointer)"lightgrey"},
-   /*XtRString,(XtPointer)"XtDefaultBackground"},*/
-
-  {XtNbackgroundPixmap, XtCPixmap, XtRPixmap, sizeof(Pixmap),
-   XtOffset(GLwDrawingAreaWidget,core.background_pixmap),
-   XtRImmediate,(XtPointer)XtUnspecifiedPixmap},
-#endif  
-  };
-
-
-
-/* The other colors such as the foreground are allocated only if
- * allocateOtherColors are set.  These resources only exist in Motif.
- */
-#ifdef __GLX_MOTIF
-static XtResource otherColorResources[] = {
-  {XmNforeground,XmCForeground,XmRPixel, 
-   sizeof(Pixel),XtOffset(GLwDrawingAreaWidget,primitive.foreground),
-   XmRString,(XtPointer)"lighgrey"},
-   /*XmRCallProc, (XtPointer) _XmForegroundColorDefault},*/
-
-  {XmNhighlightColor,XmCHighlightColor,XmRPixel,sizeof(Pixel),
-   XtOffset(GLwDrawingAreaWidget,primitive.highlight_color),
-   XmRString,(XtPointer)"lightgrey"},
-   /*XmRCallProc,(XtPointer)_XmHighlightColorDefault},*/
-
-  {XmNhighlightPixmap,XmCHighlightPixmap,XmRPrimHighlightPixmap,
-   sizeof(Pixmap),
-   XtOffset(GLwDrawingAreaWidget,primitive.highlight_pixmap),
-   XmRImmediate,(XtPointer)XmUNSPECIFIED_PIXMAP},
-   /*XmRCallProc,(XtPointer)_XmPrimitiveHighlightPixmapDefault},*/
-  };
-#endif
-
-
-#undef offset
-
-
-GLwDrawingAreaClassRec glwDrawingAreaClassRec = {
-  { /* core fields */
-#ifdef __GLX_MOTIF
-    /* superclass                */        (WidgetClass) &xmPrimitiveClassRec,
-    /* class_name                */        "GLwMDrawingArea",
-#else /* not __GLX_MOTIF */
-    /* superclass                */        (WidgetClass) &widgetClassRec,
-    /* class_name                */        "GLwDrawingArea",
-#endif /* __GLX_MOTIF */
-    /* widget_size               */        sizeof(GLwDrawingAreaRec),
-    /* class_initialize          */        NULL,
-    /* class_part_initialize     */        NULL,
-    /* class_inited              */        FALSE,
-    /* initialize                */        (XtInitProc) Initialize,
-    /* initialize_hook           */        NULL,
-    /* realize                   */        Realize,
-    /* actions                   */        actions,
-    /* num_actions               */        XtNumber(actions),
-    /* resources                 */        resources,
-    /* num_resources             */        XtNumber(resources),
-    /* xrm_class                 */        NULLQUARK,
-    /* compress_motion           */        TRUE,
-    /* compress_exposure         */        TRUE,
-    /* compress_enterleave       */        TRUE,
-    /* visible_interest          */        TRUE,
-    /* destroy                   */        (XtWidgetProc) Destroy,
-    /* resize                    */        (XtWidgetProc) Resize,
-    /* expose                    */        (XtExposeProc) Redraw,
-    /* set_values                */        NULL,
-    /* set_values_hook           */        NULL,
-    /* set_values_almost         */        XtInheritSetValuesAlmost,
-    /* get_values_hook           */        NULL,
-    /* accept_focus              */        NULL,
-    /* version                   */        XtVersion,
-    /* callback_private          */        NULL,
-    /* tm_table                  */        defaultTranslations,
-    /* query_geometry            */        XtInheritQueryGeometry,
-    /* display_accelerator       */        XtInheritDisplayAccelerator,
-    /* extension                 */        NULL
-  },
-#ifdef __GLX_MOTIF /* primitive resources */
-  {
-    /* border_highlight          */        XmInheritBorderHighlight,
-    /* border_unhighlight        */        XmInheritBorderUnhighlight,
-    /* translations              */        XtInheritTranslations,
-    /* arm_and_activate          */        NULL,
-    /* get_resources             */        NULL,
-    /* num get_resources         */        0,
-    /* extension                 */        NULL,                                
-  }
-#endif 
-  };
-
-WidgetClass glwDrawingAreaWidgetClass=(WidgetClass)&glwDrawingAreaClassRec;
-
-
-
-static void error(Widget w,char* string){
-  char buf[100];
-#ifdef __GLX_MOTIF
-  sprintf(buf,"GLwMDrawingArea: %s\n",string);
-#else
-  sprintf(buf,"GLwDrawingArea: %s\n",string);
-#endif
-  XtAppError(XtWidgetToApplicationContext(w),buf);
-  }
-
-
-static void warning(Widget w,char* string){
-  char buf[100];
-#ifdef __GLX_MOTIF
-  sprintf (buf, "GLwMDraw: %s\n", string);
-#else
-  sprintf (buf, "GLwDraw: %s\n", string);
-#endif
-  XtAppWarning(XtWidgetToApplicationContext(w), buf);
-  }
-
-
-
-/* Initialize the attribList based on the attributes */
-static void createAttribList(GLwDrawingAreaWidget w){
-  int *ptr;
-  w->glwDrawingArea.attribList = (int*)XtMalloc(ATTRIBLIST_SIZE*sizeof(int));
-  if(!w->glwDrawingArea.attribList){
-    error((Widget)w,"Unable to allocate attribute list");
-    }
-  ptr = w->glwDrawingArea.attribList;
-  *ptr++ = GLX_BUFFER_SIZE;
-  *ptr++ = w->glwDrawingArea.bufferSize;
-  *ptr++ = GLX_LEVEL;
-  *ptr++ = w->glwDrawingArea.level;
-  if(w->glwDrawingArea.rgba) *ptr++ = GLX_RGBA;
-  if(w->glwDrawingArea.doublebuffer) *ptr++ = GLX_DOUBLEBUFFER;
-  if(w->glwDrawingArea.stereo) *ptr++ = GLX_STEREO;
-  *ptr++ = GLX_AUX_BUFFERS;
-  *ptr++ = w->glwDrawingArea.auxBuffers;
-  *ptr++ = GLX_RED_SIZE;
-  *ptr++ = w->glwDrawingArea.redSize;
-  *ptr++ = GLX_GREEN_SIZE;
-  *ptr++ = w->glwDrawingArea.greenSize;
-  *ptr++ = GLX_BLUE_SIZE;
-  *ptr++ = w->glwDrawingArea.blueSize;
-  *ptr++ = GLX_ALPHA_SIZE;
-  *ptr++ = w->glwDrawingArea.alphaSize;
-  *ptr++ = GLX_DEPTH_SIZE;
-  *ptr++ = w->glwDrawingArea.depthSize;
-  *ptr++ = GLX_STENCIL_SIZE;
-  *ptr++ = w->glwDrawingArea.stencilSize;
-  *ptr++ = GLX_ACCUM_RED_SIZE;
-  *ptr++ = w->glwDrawingArea.accumRedSize;
-  *ptr++ = GLX_ACCUM_GREEN_SIZE;
-  *ptr++ = w->glwDrawingArea.accumGreenSize;
-  *ptr++ = GLX_ACCUM_BLUE_SIZE;
-  *ptr++ = w->glwDrawingArea.accumBlueSize;
-  *ptr++ = GLX_ACCUM_ALPHA_SIZE;
-  *ptr++ = w->glwDrawingArea.accumAlphaSize;
-  *ptr++ = None;
-  assert((ptr-w->glwDrawingArea.attribList)<ATTRIBLIST_SIZE);
-  }
-
-
-
-/* Initialize the visualInfo based on the attribute list */
-static void createVisualInfo(GLwDrawingAreaWidget w){
-  assert(w->glwDrawingArea.attribList);
-  w->glwDrawingArea.visualInfo=glXChooseVisual(XtDisplay(w),XScreenNumberOfScreen(XtScreen(w)),w->glwDrawingArea.attribList);
-  if(!w->glwDrawingArea.visualInfo) error((Widget)w,"requested visual not supported");
-  }
-
-
-
-/* Initialize the colormap based on the visual info.
- * This routine maintains a cache of visual-infos to colormaps.  If two
- * widgets share the same visual info, they share the same colormap.
- * This function is called by the callProc of the colormap resource entry.
- */
-static void createColormap(GLwDrawingAreaWidget w,int offset,XrmValue *value){
-  static struct cmapCache { Visual *visual; Colormap cmap; } *cmapCache;
-  static int cacheEntries=0;
-  static int cacheMalloced=0;
-  register int i;
-    
-  assert(w->glwDrawingArea.visualInfo);
-
-  /* see if we can find it in the cache */
-  for(i=0; i<cacheEntries; i++){
-    if(cmapCache[i].visual==w->glwDrawingArea.visualInfo->visual){
-      value->addr=(XtPointer)(&cmapCache[i].cmap);
-      return;
-      }
-    }
-
-  /* not in the cache, create a new entry */
-  if(cacheEntries >= cacheMalloced){
-    /* need to malloc a new one.  Since we are likely to have only a
-     * few colormaps, we allocate one the first time, and double
-     * each subsequent time.
-     */
-    if(cacheMalloced==0){
-      cacheMalloced=1;
-      cmapCache=(struct cmapCache*)XtMalloc(sizeof(struct cmapCache));
-      }
-    else{
-      cacheMalloced<<=1;
-      cmapCache=(struct cmapCache*)XtRealloc((char*)cmapCache,sizeof(struct cmapCache)*cacheMalloced);
-      }
-    }
-       
-  cmapCache[cacheEntries].cmap=XCreateColormap(XtDisplay(w),
-                                               RootWindow(XtDisplay(w),
-                                               w->glwDrawingArea.visualInfo->screen),
-                                               w->glwDrawingArea.visualInfo->visual,
-                                               AllocNone);
-  cmapCache[cacheEntries].visual=w->glwDrawingArea.visualInfo->visual;
-  value->addr=(XtPointer)(&cmapCache[cacheEntries++].cmap);
-  }
-
-
-
-static void Initialize(GLwDrawingAreaWidget req,GLwDrawingAreaWidget neww,ArgList args,Cardinal *num_args){
-
-  /* fix size */
-  if(req->core.width==0) neww->core.width=100;
-  if(req->core.height==0) neww->core.width=100;
-
-  /* create the attribute list if needed */
-  neww->glwDrawingArea.myList=FALSE;
-  if(neww->glwDrawingArea.attribList==NULL){
-    neww->glwDrawingArea.myList=TRUE;
-    createAttribList(neww);
-    }
-
-  /* Gotta have it */
-  assert(neww->glwDrawingArea.attribList);
-
-  /* determine the visual info if needed */
-  neww->glwDrawingArea.myVisual=FALSE;
-  if(neww->glwDrawingArea.visualInfo==NULL){
-    neww->glwDrawingArea.myVisual=TRUE;
-    createVisualInfo(neww);
-    }
-
-  /* Gotta have that too */
-  assert(neww->glwDrawingArea.visualInfo);
-
-  neww->core.depth=neww->glwDrawingArea.visualInfo->depth;
-
-  /* Reobtain the colormap and colors in it using XtGetApplicationResources*/
-  XtGetApplicationResources((Widget)neww,neww,initializeResources,XtNumber(initializeResources),args,*num_args);
-
-  /* obtain the color resources if appropriate */
-  if(req->glwDrawingArea.allocateBackground){
-    XtGetApplicationResources((Widget)neww,neww,backgroundResources,XtNumber(backgroundResources),args,*num_args);
-    }
-
-#ifdef __GLX_MOTIF
-  if(req->glwDrawingArea.allocateOtherColors){
-    XtGetApplicationResources((Widget)neww,neww,otherColorResources,XtNumber(otherColorResources),args,*num_args);
-    }
-#endif 
-  }
-
-
-
-static void Realize(Widget w,Mask *valueMask,XSetWindowAttributes *attributes){
-  register GLwDrawingAreaWidget glw=(GLwDrawingAreaWidget)w;
-  GLwDrawingAreaCallbackStruct cb;
-  Widget parentShell;
-  Status status;
-  Window windows[2],*windowsReturn,*windowList;
-  int countReturn,i;
-   
-  /* if we haven't requested that the background be both installed and
-   * allocated, don't install it.
-   */
-  if(!(glw->glwDrawingArea.installBackground && glw->glwDrawingArea.allocateBackground)){
-    *valueMask&=~CWBackPixel;
-    }
- 
-  XtCreateWindow(w,(unsigned int)InputOutput,glw->glwDrawingArea.visualInfo->visual,*valueMask,attributes);
-
-  /* if appropriate, call XSetWMColormapWindows to install the colormap */
-  if(glw->glwDrawingArea.installColormap){
-
-    /* Get parent shell */
-    for(parentShell=XtParent(w); parentShell&&!XtIsShell(parentShell); parentShell=XtParent(parentShell));
-
-    if(parentShell && XtWindow(parentShell)){
-
-      /* check to see if there is already a property */
-      status=XGetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),&windowsReturn,&countReturn);
-            
-      /* if no property, just create one */
-      if(!status){
-        windows[0]=XtWindow(w);
-        windows[1]=XtWindow(parentShell);
-        XSetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),windows,2);
-        }
-
-      /* there was a property, add myself to the beginning */
-      else{
-        windowList=(Window *)XtMalloc((sizeof(Window))*(countReturn+1));
-        windowList[0]=XtWindow(w);
-        for(i=0; i<countReturn; i++) windowList[i+1]=windowsReturn[i];
-        XSetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),windowList,countReturn+1);
-        XtFree((char*)windowList);
-        XtFree((char*)windowsReturn);
-        }
-      }
-    else{
-      warning(w,"Could not set colormap property on parent shell");
-      }
-    }
-
-  /* Invoke callbacks */
-  cb.reason=GLwCR_GINIT;
-  cb.event=NULL;
-  cb.width=glw->core.width;
-  cb.height=glw->core.height;
-  XtCallCallbackList((Widget)glw,glw->glwDrawingArea.ginitCallback,&cb);
-  }
-
-
-
-static void Redraw(GLwDrawingAreaWidget w,XEvent *event,Region region){
-  GLwDrawingAreaCallbackStruct cb;
-  if(!XtIsRealized((Widget)w)) return;
-  cb.reason=GLwCR_EXPOSE;
-  cb.event=event;
-  cb.width=w->core.width;
-  cb.height=w->core.height;
-  XtCallCallbackList((Widget)w,w->glwDrawingArea.exposeCallback,&cb);
-  }
-
-
-
-static void Resize(GLwDrawingAreaWidget glw){
-  GLwDrawingAreaCallbackStruct cb;
-  if(!XtIsRealized((Widget)glw)) return;
-  cb.reason=GLwCR_RESIZE;
-  cb.event=NULL;
-  cb.width=glw->core.width;
-  cb.height=glw->core.height;
-  XtCallCallbackList((Widget)glw,glw->glwDrawingArea.resizeCallback,&cb);
-  }
-
-
-
-static void Destroy(GLwDrawingAreaWidget glw){
-  Window *windowsReturn;
-  Widget parentShell;
-  Status status;
-  int countReturn;
-  register int i;
-
-  if(glw->glwDrawingArea.myList && glw->glwDrawingArea.attribList){
-    XtFree((XtPointer)glw->glwDrawingArea.attribList);
-    }
-
-  if(glw->glwDrawingArea.myVisual && glw->glwDrawingArea.visualInfo){
-    XtFree((XtPointer)glw->glwDrawingArea.visualInfo);
-    }
-
-  /* if my colormap was installed, remove it */
-  if(glw->glwDrawingArea.installColormap){
-
-    /* Get parent shell */
-    for(parentShell=XtParent(glw); parentShell&&!XtIsShell(parentShell); parentShell=XtParent(parentShell));
-
-    if(parentShell && XtWindow(parentShell)){
-
-      /* make sure there is a property */
-      status=XGetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),&windowsReturn,&countReturn);
-            
-      /* if no property, just return.  If there was a property, continue */
-      if(status){
-
-        /* search for a match */
-        for(i=0; i<countReturn; i++){
-          if(windowsReturn[i]==XtWindow(glw)){
-
-            /* we found a match, now copy the rest down */
-            for(i++; i<countReturn; i++){ windowsReturn[i-1]=windowsReturn[i]; }
-
-            XSetWMColormapWindows(XtDisplay(parentShell),XtWindow(parentShell),windowsReturn,countReturn-1);
-            break; 
-            }
-          }
-        XtFree((char *)windowsReturn);
-        }
-      }
-    }
-  }
-
-
-
-/* Action routine for keyboard and mouse events */
-static void glwInput(GLwDrawingAreaWidget glw,XEvent *event,String *params,Cardinal *numParams){
-  GLwDrawingAreaCallbackStruct cb;
-  cb.reason=GLwCR_INPUT;
-  cb.event=event;
-  cb.width=glw->core.width;
-  cb.height=glw->core.height;
-  XtCallCallbackList((Widget)glw,glw->glwDrawingArea.inputCallback,&cb);
-  }
-
-
-#ifdef __GLX_MOTIF
-
-/* Create routine */
-Widget GLwCreateMDrawingArea(Widget parent, char *name,ArgList arglist,Cardinal argcount){
-  return XtCreateWidget(name,glwMDrawingAreaWidgetClass, parent, arglist,argcount);
-  }
-
-#endif
-
-
-#ifndef __GLX_MOTIF
-
-/* Make context current */
-void GLwDrawingAreaMakeCurrent(Widget w,GLXContext ctx){
-  glXMakeCurrent(XtDisplay(w),XtWindow(w),ctx);
-  }
-
-
-/* Swap buffers convenience function */
-void GLwDrawingAreaSwapBuffers(Widget w){
-  glXSwapBuffers(XtDisplay(w),XtWindow(w));
-  }
-
-#endif
diff --git a/src/glw/GLwDrawA.h b/src/glw/GLwDrawA.h
deleted file mode 100644
index b9711c216bc..00000000000
--- a/src/glw/GLwDrawA.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
- */
-#ifndef _GLwDrawA_h
-#define _GLwDrawA_h
-
-#include <GL/glx.h>
-#include <GL/gl.h>
-
-/****************************************************************
- *
- * GLwDrawingArea widgets
- *
- ****************************************************************/
-
-/* Resources:
-
- Name		     Class		RepType		Default Value
- ----		     -----		-------		-------------
- attribList	     AttribList		int *		NULL
- visualInfo	     VisualInfo		VisualInfo	NULL
- installColormap     InstallColormap	Boolean		TRUE
- allocateBackground  AllocateColors	Boolean		FALSE
- allocateOtherColors AllocateColors	Boolean		FALSE
- installBackground   InstallBackground	Boolean		TRUE
- exposeCallback      Callback		Pointer		NULL
- ginitCallback       Callback		Pointer		NULL
- inputCallback       Callback		Pointer		NULL
- resizeCallback      Callback		Pointer		NULL
-
-*** The following resources all correspond to the GLX configuration
-*** attributes and are used to create the attribList if it is NULL
- bufferSize	     BufferSize		int		0
- level		     Level		int		0
- rgba		     Rgba		Boolean		FALSE
- doublebuffer	     Doublebuffer	Boolean		FALSE
- stereo		     Stereo		Boolean		FALSE
- auxBuffers	     AuxBuffers		int		0
- redSize	     ColorSize		int		1
- greenSize	     ColorSize		int		1
- blueSize	     ColorSize		int		1
- alphaSize	     AlphaSize		int		0
- depthSize	     DepthSize		int		0
- stencilSize	     StencilSize	int		0
- accumRedSize	     AccumColorSize	int		0
- accumGreenSize	     AccumColorSize	int		0
- accumBlueSize	     AccumColorSize	int		0
- accumAlphaSize	     AccumAlphaSize	int		0
-*/
-
-#define GLwNattribList		"attribList"
-#define GLwCAttribList		"AttribList"
-#define GLwNvisualInfo		"visualInfo"
-#define GLwCVisualInfo		"VisualInfo"
-#define GLwRVisualInfo		"VisualInfo"
-
-#define GLwNinstallColormap	"installColormap"
-#define GLwCInstallColormap	"InstallColormap"
-#define GLwNallocateBackground	"allocateBackground"
-#define GLwNallocateOtherColors	"allocateOtherColors"
-#define GLwCAllocateColors	"AllocateColors"
-#define GLwNinstallBackground	"installBackground"
-#define GLwCInstallBackground	"InstallBackground"
-
-#define GLwCCallback		"Callback"
-#define GLwNexposeCallback	"exposeCallback"
-#define GLwNginitCallback	"ginitCallback"
-#define GLwNresizeCallback	"resizeCallback"
-#define GLwNinputCallback	"inputCallback"
-
-#define GLwNbufferSize		"bufferSize"
-#define GLwCBufferSize		"BufferSize"
-#define GLwNlevel		"level"
-#define GLwCLevel		"Level"
-#define GLwNrgba		"rgba"
-#define GLwCRgba		"Rgba"
-#define GLwNdoublebuffer	"doublebuffer"
-#define GLwCDoublebuffer	"Doublebuffer"
-#define GLwNstereo		"stereo"
-#define GLwCStereo		"Stereo"
-#define GLwNauxBuffers		"auxBuffers"
-#define GLwCAuxBuffers		"AuxBuffers"
-#define GLwNredSize		"redSize"
-#define GLwNgreenSize		"greenSize"
-#define GLwNblueSize		"blueSize"
-#define GLwCColorSize		"ColorSize"
-#define GLwNalphaSize		"alphaSize"
-#define GLwCAlphaSize		"AlphaSize"
-#define GLwNdepthSize		"depthSize"
-#define GLwCDepthSize		"DepthSize"
-#define GLwNstencilSize		"stencilSize"
-#define GLwCStencilSize		"StencilSize"
-#define GLwNaccumRedSize	"accumRedSize"
-#define GLwNaccumGreenSize	"accumGreenSize"
-#define GLwNaccumBlueSize	"accumBlueSize"
-#define GLwCAccumColorSize	"AccumColorSize"
-#define GLwNaccumAlphaSize	"accumAlphaSize"
-#define GLwCAccumAlphaSize	"AccumAlphaSize"
-
-#ifdef __GLX_MOTIF
-
-typedef struct _GLwMDrawingAreaClassRec	*GLwMDrawingAreaWidgetClass;
-typedef struct _GLwMDrawingAreaRec	*GLwMDrawingAreaWidget;
-
-GLAPI WidgetClass glwMDrawingAreaWidgetClass;
-
-
-#else 
-
-typedef struct _GLwDrawingAreaClassRec	*GLwDrawingAreaWidgetClass;
-typedef struct _GLwDrawingAreaRec	*GLwDrawingAreaWidget;
-
-GLAPI WidgetClass glwDrawingAreaWidgetClass;
-
-
-#endif
-
-
-/* Callback reasons */
-#ifdef __GLX_MOTIF
-#define GLwCR_EXPOSE	XmCR_EXPOSE
-#define GLwCR_RESIZE	XmCR_RESIZE
-#define GLwCR_INPUT	XmCR_INPUT
-#else 
-/* The same values as Motif, but don't use Motif constants */
-#define GLwCR_EXPOSE	38
-#define GLwCR_RESIZE	39
-#define GLwCR_INPUT	40
-#endif
-
-#define GLwCR_GINIT	32135	/* Arbitrary number that should neverr clash */
-
-typedef struct 
-  {
-  int       reason;
-  XEvent   *event;
-  Dimension width,height;
-  } 
-  GLwDrawingAreaCallbackStruct;
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-/* front ends to glXMakeCurrent and glXSwapBuffers */
-GLAPI void GLwDrawingAreaMakeCurrent(Widget w,GLXContext ctx);
-GLAPI void GLwDrawingAreaSwapBuffers(Widget w);
-
-#ifdef __GLX_MOTIF
-#ifdef _NO_PROTO
-GLAPI Widget GLwCreateMDrawingArea();
-#else
-GLAPI Widget GLwCreateMDrawingArea(Widget parent,char *name,ArgList arglist,Cardinal argcount);
-#endif
-#endif 
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}
-#endif
-
-#endif
diff --git a/src/glw/GLwDrawAP.h b/src/glw/GLwDrawAP.h
deleted file mode 100644
index 4ff21b426dd..00000000000
--- a/src/glw/GLwDrawAP.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
- */
-#ifndef _GLwDrawAP_h
-#define _GLwDrawAP_h
-
-
-/* MOTIF */
-#ifdef __GLX_MOTIF
-#include "GLwMDrawA.h"
-#else
-#include "GLwDrawA.h"
-#endif
-
-typedef struct _GLwDrawingAreaClassPart {
-  caddr_t extension;
-  } GLwDrawingAreaClassPart;
-
-
-#ifdef __GLX_MOTIF
-typedef struct _GLwMDrawingAreaClassRec {
-  CoreClassPart               core_class;
-  XmPrimitiveClassPart        primitive_class;
-  GLwDrawingAreaClassPart     glwDrawingArea_class;
-  } GLwMDrawingAreaClassRec;
-
-
-GLAPI GLwMDrawingAreaClassRec glwMDrawingAreaClassRec;
-
-
-/* XT */
-#else 
-
-typedef struct _GLwDrawingAreaClassRec {
-  CoreClassPart               core_class;
-  GLwDrawingAreaClassPart     glwDrawingArea_class;
-  } GLwDrawingAreaClassRec;
-
-GLAPI GLwDrawingAreaClassRec glwDrawingAreaClassRec;
-
-
-#endif 
-
-
-
-typedef struct {
-  /* resources */
-  int *                attribList;
-  XVisualInfo *        visualInfo;
-  Boolean              myList;                /* TRUE if we malloced the attribList*/
-  Boolean              myVisual;        /* TRUE if we created the visualInfo*/
-  Boolean              installColormap;
-  Boolean              allocateBackground;
-  Boolean              allocateOtherColors;
-  Boolean              installBackground;
-  XtCallbackList       ginitCallback;
-  XtCallbackList       resizeCallback;
-  XtCallbackList       exposeCallback;
-  XtCallbackList       inputCallback;
-  /* specific attributes; add as we get new attributes */
-  int                  bufferSize;
-  int                  level;
-  Boolean              rgba;
-  Boolean              doublebuffer;
-  Boolean              stereo;
-  int                  auxBuffers;
-  int                  redSize;
-  int                  greenSize;
-  int                  blueSize;
-  int                  alphaSize;
-  int                  depthSize;
-  int                  stencilSize;
-  int                  accumRedSize;
-  int                  accumGreenSize;
-  int                  accumBlueSize;
-  int                  accumAlphaSize;
-  } GLwDrawingAreaPart;
-
-#ifdef __GLX_MOTIF
-
-typedef struct _GLwMDrawingAreaRec {
-  CorePart             core;
-  XmPrimitivePart      primitive;
-  GLwDrawingAreaPart   glwDrawingArea;
-  } GLwMDrawingAreaRec;
-
-#else 
-
-typedef struct _GLwDrawingAreaRec {
-  CorePart             core;
-  GLwDrawingAreaPart   glwDrawingArea;
-  } GLwDrawingAreaRec;
-
-#endif 
-
-#endif
diff --git a/src/glw/GLwMDrawA.c b/src/glw/GLwMDrawA.c
deleted file mode 100644
index bdefe92a6d0..00000000000
--- a/src/glw/GLwMDrawA.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
- */
-#ifndef __GLX_MOTIF
-#define __GLX_MOTIF 1
-#endif
-#include "GLwDrawA.c"
diff --git a/src/glw/GLwMDrawA.h b/src/glw/GLwMDrawA.h
deleted file mode 100644
index 2e245890410..00000000000
--- a/src/glw/GLwMDrawA.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
- */
-#ifndef __GLX_MOTIF
-#define __GLX_MOTIF 1
-#endif
-#include "GLwDrawA.h"
diff --git a/src/glw/GLwMDrawAP.h b/src/glw/GLwMDrawAP.h
deleted file mode 100644
index a0a689bb996..00000000000
--- a/src/glw/GLwMDrawAP.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
- */
-#ifndef __GLX_MOTIF
-#define __GLX_MOTIF 1
-#endif
-#include "GLwDrawAP.h"
diff --git a/src/glw/Makefile b/src/glw/Makefile
deleted file mode 100644
index 776b1aa5bfb..00000000000
--- a/src/glw/Makefile
+++ /dev/null
@@ -1,74 +0,0 @@
-# src/glw/Makefile
-
-TOP = ../..
-include $(TOP)/configs/current
-
-MAJOR = 1
-MINOR = 0
-TINY = 0
-
-INCDIRS = -I$(TOP)/include $(MOTIF_CFLAGS) $(X11_INCLUDES)
-
-
-OBJECTS = $(GLW_SOURCES:.c=.o)
-
-
-
-##### RULES #####
-
-.c.o:
-	$(CC) -c $(INCDIRS) $(CFLAGS) $(GLW_CFLAGS) $<
-
-
-
-##### TARGETS #####
-
-default: $(TOP)/$(LIB_DIR)/$(GLW_LIB_NAME)
-
-# GLU pkg-config file
-pcedit = sed \
-	-e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \
-	-e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \
-	-e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \
-	-e 's,@VERSION@,$(MAJOR).$(MINOR).$(TINY),' \
-	-e 's,@GLW_PC_REQ_PRIV@,$(GLW_PC_REQ_PRIV),' \
-	-e 's,@GLW_PC_LIB_PRIV@,$(GLW_PC_LIB_PRIV),' \
-	-e 's,@GLW_PC_CFLAGS@,$(GLW_PC_CFLAGS),' \
-	-e 's,@GLW_LIB@,$(GLW_LIB),'
-glw.pc: glw.pc.in
-	$(pcedit) $< > $@
-
-install: glw.pc
-	$(INSTALL) -d $(DESTDIR)$(INSTALL_INC_DIR)/GL
-	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)
-	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
-	$(INSTALL) -m 644 *.h $(DESTDIR)$(INSTALL_INC_DIR)/GL
-	$(MINSTALL) $(TOP)/$(LIB_DIR)/$(GLW_LIB_GLOB) $(DESTDIR)$(INSTALL_LIB_DIR)
-	$(INSTALL) -m 644 glw.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
-
-clean:
-	-rm -f depend depend.bak
-	-rm -f *.o *.pc *~
-
-
-# Make the library
-$(TOP)/$(LIB_DIR)/$(GLW_LIB_NAME): $(OBJECTS)
-	$(MKLIB) -o $(GLW_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
-		-major $(MAJOR) -minor $(MINOR) -patch $(TINY) \
-		$(MKLIB_OPTIONS) -install $(TOP)/$(LIB_DIR) \
-		-id $(INSTALL_LIB_DIR)/lib$(GLW_LIB).$(MAJOR).dylib \
-		$(GLW_LIB_DEPS) $(OBJECTS)
-
-
-#
-# Run 'make depend' to update the dependencies if you change what's included
-# by any source file.
-# 
-depend: $(GLW_SOURCES)
-	rm -f depend
-	touch depend
-	$(MKDEP) $(MKDEP_OPTIONS) -I$(TOP)/include $(GLW_SOURCES) \
-		$(X11_INCLUDES) > /dev/null
-
-
--include depend
diff --git a/src/glw/README b/src/glw/README
deleted file mode 100644
index 70f4f7bc2ee..00000000000
--- a/src/glw/README
+++ /dev/null
@@ -1,56 +0,0 @@
-
-                           widgets README file
-
-
-This directory contains the source code for SGI's OpenGL Xt/Motif widgets,
-slightly modified by Jeroen van der Zijp to work better with Mesa.
-
-To compile the widget code (producing lib/libGLw.a) cd to the widgets/
-directory and type 'make <config>' where <config> is the system configuration
-you used to compile Mesa (like 'make linux').  This hasn't been tested on
-many systems so let us know if you have trouble.
-
-If you want to make a Linux ELF shared lib instead of the non-shared .a
-file see the notes in the Makefile.
-
-If you want to build with Motif support, edit Makefile.X11, looking
-for the "Motif" information.
-
-The SGI copyright is as follows.
-
-
- * (c) Copyright 1993, Silicon Graphics, Inc.
- * ALL RIGHTS RESERVED 
- * Permission to use, copy, modify, and distribute this software for 
- * any purpose and without fee is hereby granted, provided that the above
- * copyright notice appear in all copies and that both the copyright notice
- * and this permission notice appear in supporting documentation, and that 
- * the name of Silicon Graphics, Inc. not be used in advertising
- * or publicity pertaining to distribution of the software without specific,
- * written prior permission. 
- *
- * THE MATERIAL EMBODIED ON THIS SOFTWARE IS PROVIDED TO YOU "AS-IS"
- * AND WITHOUT WARRANTY OF ANY KIND, EXPRESS, IMPLIED OR OTHERWISE,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR
- * FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL SILICON
- * GRAPHICS, INC.  BE LIABLE TO YOU OR ANYONE ELSE FOR ANY DIRECT,
- * SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY
- * KIND, OR ANY DAMAGES WHATSOEVER, INCLUDING WITHOUT LIMITATION,
- * LOSS OF PROFIT, LOSS OF USE, SAVINGS OR REVENUE, OR THE CLAIMS OF
- * THIRD PARTIES, WHETHER OR NOT SILICON GRAPHICS, INC.  HAS BEEN
- * ADVISED OF THE POSSIBILITY OF SUCH LOSS, HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
- * POSSESSION, USE OR PERFORMANCE OF THIS SOFTWARE.
- * 
- * 
- * US Government Users Restricted Rights 
- * Use, duplication, or disclosure by the Government is subject to
- * restrictions set forth in FAR 52.227.19(c)(2) or subparagraph
- * (c)(1)(ii) of the Rights in Technical Data and Computer Software
- * clause at DFARS 252.227-7013 and/or in similar or successor
- * clauses in the FAR or the DOD or NASA FAR Supplement.
- * Unpublished-- rights reserved under the copyright laws of the
- * United States.  Contractor/manufacturer is Silicon Graphics,
- * Inc., 2011 N.  Shoreline Blvd., Mountain View, CA 94039-7311.
- *
- * OpenGL(TM) is a trademark of Silicon Graphics, Inc.
diff --git a/src/glw/glw.pc.in b/src/glw/glw.pc.in
deleted file mode 100644
index 19a7c307c01..00000000000
--- a/src/glw/glw.pc.in
+++ /dev/null
@@ -1,13 +0,0 @@
-prefix=@INSTALL_DIR@
-exec_prefix=${prefix}
-libdir=@INSTALL_LIB_DIR@
-includedir=@INSTALL_INC_DIR@
-
-Name: glw
-Description: Mesa OpenGL widget library
-Requires: gl
-Requires.private: @GLW_PC_REQ_PRIV@
-Version: @VERSION@
-Libs: -L${libdir} -l@GLW_LIB@
-Libs.private: @GLW_PC_LIB_PRIV@
-Cflags: -I${includedir} @GLW_PC_CFLAGS@

From c548192cafdf7dfab4cf7e0a0734417ee16f1c94 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 5 Aug 2011 16:59:04 -0700
Subject: [PATCH 299/600] docs: Remove GLw from the documentation except for a
 new FAQ entry.

Also remove an outdated reference to GLEW being in tree.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 docs/autoconf.html   | 7 -------
 docs/download.html   | 1 -
 docs/faq.html        | 7 ++++++-
 docs/install.html    | 5 -----
 docs/sourcetree.html | 2 --
 5 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/docs/autoconf.html b/docs/autoconf.html
index 64bcbd48a67..895cf665c5b 100644
--- a/docs/autoconf.html
+++ b/docs/autoconf.html
@@ -20,7 +20,6 @@
 <li><a href="#library">Library Options</a></li>
   <ul>
   <li><a href="#glu">GLU</a></li>
-  <li><a href="#glw">GLw</a></li>
   </ul>
 <li><a href="#demos">Demo Program Options</a></li>
 </ol>
@@ -245,12 +244,6 @@ instructions</a>.
 on all drivers. This can be disable with the option
 <code>--disable-glu</code>.
 </li>
-
-<a name="glw">
-<li><b><em>GLw</em></b> - The libGLw library will be built by default
-if libGLU has been enabled. This can be disable with the option
-<code>--disable-glw</code>.
-</li>
 </ul>
 </p>
 
diff --git a/docs/download.html b/docs/download.html
index 3c4d5976c2c..4e8fc2f02f0 100644
--- a/docs/download.html
+++ b/docs/download.html
@@ -84,7 +84,6 @@ src/mesa	- sources for the main Mesa library and device drivers
 src/gallium     - sources for Gallium and Gallium drivers
 src/glu		- libGLU source code
 src/glx		- sources for building libGL with full GLX and DRI support
-src/glw		- Xt/Motif/OpenGL widget code
 </pre>
 
 If you downloaded and unpacked the MesaGLUT.x.y.z package:
diff --git a/docs/faq.html b/docs/faq.html
index 071381c5a1c..bf6545fd5f7 100644
--- a/docs/faq.html
+++ b/docs/faq.html
@@ -204,8 +204,13 @@ If you don't already have GLUT installed, you should grab
 </a></p>
 
 
+<h2><a name="part2">2.4 Where is the GLw library?</a></h2>
+<p>
+<a name="part2">GLw (OpenGL widget library) is now available from a separate <a href="http://cgit.freedesktop.org/mesa/glw/">git repository</a>.  Unless you're using very old Xt/Motif applications with OpenGL, you shouldn't need it.
+</a></p>
 
-<h2><a name="part2">2.4 What's the proper place for the libraries and headers?</a></h2>
+
+<h2><a name="part2">2.5 What's the proper place for the libraries and headers?</a></h2>
 <p>
 <a name="part2">On Linux-based systems you'll want to follow the
 </a><a href="http://oss.sgi.com/projects/ogl-sample/ABI/index.html"
diff --git a/docs/install.html b/docs/install.html
index e1018119a76..c86a755e4b6 100644
--- a/docs/install.html
+++ b/docs/install.html
@@ -157,9 +157,6 @@ lrwxrwxrwx    1 brian    users          20 Mar 26 07:53 libGLU.so.1 -> libGLU.so
 lrwxrwxrwx    1 brian    users          12 Mar 26 07:53 libglut.so -> libglut.so.3*
 lrwxrwxrwx    1 brian    users          16 Mar 26 07:53 libglut.so.3 -> libglut.so.3.7.1*
 -rwxr-xr-x    1 brian    users      597754 Mar 26 07:53 libglut.so.3.7.1*
-lrwxrwxrwx    1 brian    users          11 Mar 26 08:04 libGLw.so -> libGLw.so.1*
-lrwxrwxrwx    1 brian    users          15 Mar 26 08:04 libGLw.so.1 -> libGLw.so.1.0.0*
--rwxr-xr-x    1 brian    users       20750 Mar 26 08:04 libGLw.so.1.0.0*
 lrwxrwxrwx    1 brian    users          14 Mar 26 07:53 libOSMesa.so -> libOSMesa.so.6*
 lrwxrwxrwx    1 brian    users          23 Mar 26 07:53 libOSMesa.so.6 -> libOSMesa.so.6.1.060100*
 -rwxr-xr-x    1 brian    users       23871 Mar 26 07:53 libOSMesa.so.6.1.060100*
@@ -172,8 +169,6 @@ lrwxrwxrwx    1 brian    users          23 Mar 26 07:53 libOSMesa.so.6 -> libOSM
 <br>
 <b>libglut</b> is the GLUT library.
 <br>
-<b>libGLw</b> is the Xt/Motif OpenGL drawing area widget library.
-<br>
 <b>libOSMesa</b> is the OSMesa (Off-Screen) interface library.
 </p>
 
diff --git a/docs/sourcetree.html b/docs/sourcetree.html
index 2e2d1d3f275..713e25b019c 100644
--- a/docs/sourcetree.html
+++ b/docs/sourcetree.html
@@ -153,8 +153,6 @@ each directory.
   <li><b>glx</b> - The GLX library code for building libGL.  This is used for
          direct rendering drivers.  It will dynamically load one of the 
          xxx_dri.so drivers.
-  <li><b>glw</b> - Widgets for Xt/Motif.
-  <li><b>glew</b> - OpenGL Extension Wrangler library (used by demo programs)
   </ul>
 <li><b>progs</b> - OpenGL test and demonstration programs
 <li><b>lib</b> - where the GL libraries are placed

From 67b5a3267d639c31d3ac4073be877ffb0f5637d3 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Tue, 2 Aug 2011 14:34:17 -0700
Subject: [PATCH 300/600] glsl: Perform implicit type conversions on function
 call out parameters.

When an out parameter undergoes an implicit type conversion, we need
to store it in a temporary, and then after the call completes, convert
the resulting value.  In other words, we convert code like the
following:

void f(out int x);
float value;
f(value);

Into IR that's equivalent to this:

void f(out int x);
float value;
int out_parameter_conversion;
f(out_parameter_conversion);
value = float(out_parameter_conversion);

This transformation needs to happen during ast-to-IR convertion (as
opposed to, say, a lowering pass), because it is invalid IR for formal
and actual parameters to have types that don't match.

Fixes piglit tests
spec/glsl-1.20/compiler/qualifiers/out-conversion-int-to-float.vert and
spec/glsl-1.20/execution/qualifiers/vs-out-conversion-*.shader_test,
and bug 39651.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=39651

Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 src/glsl/ast_function.cpp | 76 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 71 insertions(+), 5 deletions(-)

diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 5b6ed3bc8f5..c49a33d0486 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -134,6 +134,8 @@ match_function_by_name(exec_list *instructions, const char *name,
       }
    }
 
+   exec_list post_call_conversions;
+
    if (sig != NULL) {
       /* Verify that 'out' and 'inout' actual parameters are lvalues.  This
        * isn't done in ir_function::matching_signature because that function
@@ -141,6 +143,12 @@ match_function_by_name(exec_list *instructions, const char *name,
        *
        * Also, validate that 'const_in' formal parameters (an extension of our
        * IR) correspond to ir_constant actual parameters.
+       *
+       * Also, perform implicit conversion of arguments.  Note: to implicitly
+       * convert out parameters, we need to place them in a temporary
+       * variable, and do the conversion after the call takes place.  Since we
+       * haven't emitted the call yet, we'll place the post-call conversions
+       * in a temporary exec_list, and emit them later.
        */
       exec_list_iterator actual_iter = actual_parameters->iterator();
       exec_list_iterator formal_iter = sig->parameters.iterator();
@@ -185,8 +193,63 @@ match_function_by_name(exec_list *instructions, const char *name,
 	 }
 
 	 if (formal->type->is_numeric() || formal->type->is_boolean()) {
-	    ir_rvalue *converted = convert_component(actual, formal->type);
-	    actual->replace_with(converted);
+            switch (formal->mode) {
+            case ir_var_in: {
+               ir_rvalue *converted
+                  = convert_component(actual, formal->type);
+               actual->replace_with(converted);
+               break;
+            }
+            case ir_var_out:
+               if (actual->type != formal->type) {
+                  /* To convert an out parameter, we need to create a
+                   * temporary variable to hold the value before conversion,
+                   * and then perform the conversion after the function call
+                   * returns.
+                   *
+                   * This has the effect of transforming code like this:
+                   *
+                   *   void f(out int x);
+                   *   float value;
+                   *   f(value);
+                   *
+                   * Into IR that's equivalent to this:
+                   *
+                   *   void f(out int x);
+                   *   float value;
+                   *   int out_parameter_conversion;
+                   *   f(out_parameter_conversion);
+                   *   value = float(out_parameter_conversion);
+                   */
+                  ir_variable *tmp =
+                     new(ctx) ir_variable(formal->type,
+                                          "out_parameter_conversion",
+                                          ir_var_temporary);
+                  instructions->push_tail(tmp);
+                  ir_dereference_variable *deref_tmp_1
+                     = new(ctx) ir_dereference_variable(tmp);
+                  ir_dereference_variable *deref_tmp_2
+                     = new(ctx) ir_dereference_variable(tmp);
+                  ir_rvalue *converted_tmp
+                     = convert_component(deref_tmp_1, actual->type);
+                  ir_assignment *assignment
+                     = new(ctx) ir_assignment(actual, converted_tmp);
+                  post_call_conversions.push_tail(assignment);
+                  actual->replace_with(deref_tmp_2);
+               }
+               break;
+            case ir_var_inout:
+               /* Inout parameters should never require conversion, since that
+                * would require an implicit conversion to exist both to and
+                * from the formal parameter type, and there are no
+                * bidirectional implicit conversions.
+                */
+               assert (actual->type == formal->type);
+               break;
+            default:
+               assert (!"Illegal formal parameter mode");
+               break;
+            }
 	 }
 
 	 actual_iter.next();
@@ -196,8 +259,11 @@ match_function_by_name(exec_list *instructions, const char *name,
       /* Always insert the call in the instruction stream, and return a deref
        * of its return val if it returns a value, since we don't know if
        * the rvalue is going to be assigned to anything or not.
+       *
+       * Also insert any out parameter conversions after the call.
        */
       ir_call *call = new(ctx) ir_call(sig, actual_parameters);
+      ir_dereference_variable *deref;
       if (!sig->return_type->is_void()) {
          /* If the function call is a constant expression, don't
           * generate the instructions to call it; just generate an
@@ -214,7 +280,6 @@ match_function_by_name(exec_list *instructions, const char *name,
          }
 
 	 ir_variable *var;
-	 ir_dereference_variable *deref;
 
 	 var = new(ctx) ir_variable(sig->return_type,
 				    ralloc_asprintf(ctx, "%s_retval",
@@ -227,11 +292,12 @@ match_function_by_name(exec_list *instructions, const char *name,
 	 instructions->push_tail(assign);
 
 	 deref = new(ctx) ir_dereference_variable(var);
-	 return deref;
       } else {
 	 instructions->push_tail(call);
-	 return NULL;
+	 deref = NULL;
       }
+      instructions->append_list(&post_call_conversions);
+      return deref;
    } else {
       char *str = prototype_string(NULL, name, actual_parameters);
 

From a52b53b56e2b5d5853345d8bcd2a4ff50e495c20 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Tue, 2 Aug 2011 15:22:25 -0700
Subject: [PATCH 301/600] glsl: Make is_lvalue() and variable_referenced()
 const.

These functions don't modify the target instruction, so it makes sense
to make them const.  This allows these functions to be called from ir
validation code (which uses const to ensure that it doesn't
accidentally modify the IR being validated).

Reviewed-by: Chad Versace <chad@chad-versace.us>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/ir.cpp |  4 ++--
 src/glsl/ir.h   | 18 +++++++++---------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 827fe8e17a7..6f8676ecceb 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1096,7 +1096,7 @@ ir_dereference_record::ir_dereference_record(ir_variable *var,
 }
 
 bool
-ir_dereference::is_lvalue()
+ir_dereference::is_lvalue() const
 {
    ir_variable *var = this->variable_referenced();
 
@@ -1310,7 +1310,7 @@ ir_swizzle::create(ir_rvalue *val, const char *str, unsigned vector_length)
 #undef I
 
 ir_variable *
-ir_swizzle::variable_referenced()
+ir_swizzle::variable_referenced() const
 {
    return this->val->variable_referenced();
 }
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 50a9d6e1991..04fa97bf56f 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -144,7 +144,7 @@ public:
 
    ir_rvalue *as_rvalue_to_saturate();
 
-   virtual bool is_lvalue()
+   virtual bool is_lvalue() const
    {
       return false;
    }
@@ -152,7 +152,7 @@ public:
    /**
     * Get the variable that is ultimately referenced by an r-value
     */
-   virtual ir_variable *variable_referenced()
+   virtual ir_variable *variable_referenced() const
    {
       return NULL;
    }
@@ -1355,7 +1355,7 @@ public:
 
    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
 
-   bool is_lvalue()
+   bool is_lvalue() const
    {
       return val->is_lvalue() && !mask.has_duplicates;
    }
@@ -1363,7 +1363,7 @@ public:
    /**
     * Get the variable that is ultimately referenced by an r-value
     */
-   virtual ir_variable *variable_referenced();
+   virtual ir_variable *variable_referenced() const;
 
    ir_rvalue *val;
    ir_swizzle_mask mask;
@@ -1387,12 +1387,12 @@ public:
       return this;
    }
 
-   bool is_lvalue();
+   bool is_lvalue() const;
 
    /**
     * Get the variable that is ultimately referenced by an r-value
     */
-   virtual ir_variable *variable_referenced() = 0;
+   virtual ir_variable *variable_referenced() const = 0;
 };
 
 
@@ -1413,7 +1413,7 @@ public:
    /**
     * Get the variable that is ultimately referenced by an r-value
     */
-   virtual ir_variable *variable_referenced()
+   virtual ir_variable *variable_referenced() const
    {
       return this->var;
    }
@@ -1462,7 +1462,7 @@ public:
    /**
     * Get the variable that is ultimately referenced by an r-value
     */
-   virtual ir_variable *variable_referenced()
+   virtual ir_variable *variable_referenced() const
    {
       return this->array->variable_referenced();
    }
@@ -1496,7 +1496,7 @@ public:
    /**
     * Get the variable that is ultimately referenced by an r-value
     */
-   virtual ir_variable *variable_referenced()
+   virtual ir_variable *variable_referenced() const
    {
       return this->record->variable_referenced();
    }

From 303e05cc249df3baeb3ed7654b0de00e7b9358fc Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Tue, 2 Aug 2011 15:44:39 -0700
Subject: [PATCH 302/600] glsl: Add validations for ir_call.

This patch extends ir_validate.cpp to check the following
characteristics of each ir_call:

- The number of actual parameters must match the number of formal
  parameters in the signature.

- The type of each actual parameter must match the type of the
  corresponding formal parameter in the signature.

- Each "out" or "inout" actual parameter must be an lvalue.

Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 src/glsl/ir_validate.cpp | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index f3fceb2a57d..b3ca72ef0c2 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -541,7 +541,43 @@ ir_validate::visit_enter(ir_call *ir)
       abort();
    }
 
+   const exec_node *formal_param_node = callee->parameters.head;
+   const exec_node *actual_param_node = ir->actual_parameters.head;
+   while (true) {
+      if (formal_param_node->is_tail_sentinel()
+          != actual_param_node->is_tail_sentinel()) {
+         printf("ir_call has the wrong number of parameters:\n");
+         goto dump_ir;
+      }
+      if (formal_param_node->is_tail_sentinel()) {
+         break;
+      }
+      const ir_variable *formal_param
+         = (const ir_variable *) formal_param_node;
+      const ir_rvalue *actual_param
+         = (const ir_rvalue *) actual_param_node;
+      if (formal_param->type != actual_param->type) {
+         printf("ir_call parameter type mismatch:\n");
+         goto dump_ir;
+      }
+      if (formal_param->mode == ir_var_out
+          || formal_param->mode == ir_var_inout) {
+         if (!actual_param->is_lvalue()) {
+            printf("ir_call out/inout parameters must be lvalues:\n");
+            goto dump_ir;
+         }
+      }
+      formal_param_node = formal_param_node->next;
+      actual_param_node = actual_param_node->next;
+   }
+
    return visit_continue;
+
+dump_ir:
+   ir->print();
+   printf("callee:\n");
+   callee->print();
+   abort();
 }
 
 void

From 5880a9a4a7247e4c31df606bef089c45b4052aaa Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 3 Aug 2011 16:36:42 -0700
Subject: [PATCH 303/600] radeon: Explain to the user what went wrong when
 built without libdrm.

Before this commit, even LIBGL_DEBUG=verbose would just fail with:
libGL error: failed to create dri screen
---
 src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
index 607b7470d4b..a74c6c7a575 100644
--- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
+++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
@@ -78,6 +78,9 @@ static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy)
 
 static inline void *radeon_bo_manager_gem_ctor(int fd)
 {
+  fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n",
+	  __func__, __LINE__);
+
   return NULL;
 }
 
@@ -87,6 +90,9 @@ static inline void radeon_bo_manager_gem_dtor(void *dummy)
 
 static inline void *radeon_cs_manager_gem_ctor(int fd)
 {
+  fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n",
+	  __func__, __LINE__);
+
   return NULL;
 }
 

From a313c29c777de0bc74c252f1d35eef7d3d907e1f Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 21:40:50 -0700
Subject: [PATCH 304/600] glsl: When assigning to a whole array, mark the array
 as accessed.

The vs-varying-array-mat2-col-row-wr test writes a mat2[3] constant to
a mat2[3] varying out array, and also statically accesses element 1 of
it on the VS and FS sides.  At link time it would get trimmed down to
just 2 elements, and then codegen of the VS would end up generating
assignments to the unallocated last entry of the array.  On the new
i965 VS backend, that happened to land on the vertex position.

Some issues remain in this test on softpipe, i965/old-vs and
i965/new-vs on visual inspection, but i965 is passing because only one
green pixel is probed, not the whole split green/red quad.
---
 src/glsl/ast_to_hir.cpp | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 2025911acd3..9e7496b4b43 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -653,6 +653,16 @@ validate_assignment(struct _mesa_glsl_parse_state *state,
    return NULL;
 }
 
+static void
+mark_whole_array_access(ir_rvalue *access)
+{
+   ir_dereference_variable *deref = access->as_dereference_variable();
+
+   if (deref && deref->var) {
+      deref->var->max_array_access = deref->type->length - 1;
+   }
+}
+
 ir_rvalue *
 do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
 	      ir_rvalue *lhs, ir_rvalue *rhs, bool is_initializer,
@@ -713,6 +723,7 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
 						   rhs->type->array_size());
 	 d->type = var->type;
       }
+      mark_whole_array_access(lhs);
    }
 
    /* Most callers of do_assignment (assign, add_assign, pre_inc/dec,
@@ -773,16 +784,6 @@ ast_node::hir(exec_list *instructions,
    return NULL;
 }
 
-static void
-mark_whole_array_access(ir_rvalue *access)
-{
-   ir_dereference_variable *deref = access->as_dereference_variable();
-
-   if (deref) {
-      deref->var->max_array_access = deref->type->length - 1;
-   }
-}
-
 static ir_rvalue *
 do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
 {

From 9b784069ce76e1914eaafac0542458f6a84d9fc0 Mon Sep 17 00:00:00 2001
From: Cooper Yuan <cooperyuan@gmail.com>
Date: Tue, 16 Aug 2011 09:32:10 +0800
Subject: [PATCH 305/600] dri2: add code to dri2_Flush extension.

It's going to flush client's commands in eglWaitClient(). Before this,
egl applications using pixmap or pbuffer flicker because of no flush.

Reviewed-by: Alan Hourihane
---
 src/gallium/state_trackers/dri/drm/dri2.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c
index d491e46ab16..908a735234e 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -46,6 +46,10 @@
 static void
 dri2_flush_drawable(__DRIdrawable *draw)
 {
+   struct dri_drawable *drawable = dri_drawable(draw);
+   struct dri_context *ctx = dri_get_current(draw->driScreenPriv);
+
+   ctx->st->flush(ctx->st, 0, NULL);
 }
 
 static void

From 8c409403217cf8c13e1d2dd306ad5e86b566c5c9 Mon Sep 17 00:00:00 2001
From: Benjamin Franzke <benjaminfranzke@googlemail.com>
Date: Mon, 15 Aug 2011 09:50:19 +0200
Subject: [PATCH 306/600] dri2: Add __DRI_BUFFER_COUNT token

Remove definition from egl_dri2.
Defining this is egl_dri2.h breaks as soon as
a new dri2 buffer token is added like with commit
4501a5d6e8d00fd0d87625352ed5ba1a8861f72e.
---
 include/GL/internal/dri_interface.h | 3 +++
 src/egl/drivers/dri2/egl_dri2.h     | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h
index 4fe9e943b55..eed159e11cd 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -694,6 +694,9 @@ struct __DRIswrastExtensionRec {
 #define __DRI_BUFFER_DEPTH_STENCIL	9  /**< Only available with DRI2 1.1 */
 #define __DRI_BUFFER_HIZ		10
 
+/* Inofficial and for internal use. Increase when adding a new buffer token. */
+#define __DRI_BUFFER_COUNT		11
+
 struct __DRIbufferRec {
     unsigned int attachment;
     unsigned int name;
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index a7297188af2..db93eec14ba 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -123,8 +123,6 @@ enum wayland_buffer_type {
    WL_BUFFER_THIRD,
    WL_BUFFER_COUNT
 };
-
-#define __DRI_BUFFER_COUNT 10
 #endif
 
 enum dri2_surface_type {

From ce12f826927cf2d3ac3fd70d893abfb07adc23db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 22 Jul 2011 19:25:07 +0200
Subject: [PATCH 307/600] r600g: first step into winsys/radeon

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 configure.ac                                  |  2 +-
 src/gallium/drivers/r600/r600_pipe.c          |  4 +-
 src/gallium/drivers/r600/r600_pipe.h          |  4 +-
 src/gallium/drivers/r600/r600_public.h        |  4 +-
 src/gallium/targets/dri-r600/Makefile         |  1 +
 src/gallium/targets/dri-r600/target.c         |  6 +--
 src/gallium/targets/egl-static/Makefile       |  3 ++
 src/gallium/targets/egl-static/egl_pipe.c     |  5 +-
 src/gallium/targets/gbm/Makefile              | 10 +++-
 src/gallium/targets/gbm/pipe_r600.c           |  7 ++-
 src/gallium/targets/va-r600/Makefile          |  1 +
 src/gallium/targets/va-r600/target.c          |  6 +--
 src/gallium/targets/vdpau-r600/Makefile       |  1 +
 src/gallium/targets/vdpau-r600/target.c       |  6 +--
 src/gallium/targets/xvmc-r600/Makefile        |  1 +
 src/gallium/targets/xvmc-r600/target.c        |  6 +--
 src/gallium/winsys/r600/drm/r600_drm.c        | 50 ++++---------------
 src/gallium/winsys/r600/drm/r600_drm_public.h |  4 +-
 src/gallium/winsys/r600/drm/r600_priv.h       |  2 +
 .../winsys/radeon/drm/radeon_drm_winsys.c     | 35 ++++++++++---
 .../winsys/radeon/drm/radeon_drm_winsys.h     |  6 +++
 src/gallium/winsys/radeon/drm/radeon_winsys.h |  3 ++
 22 files changed, 93 insertions(+), 74 deletions(-)

diff --git a/configure.ac b/configure.ac
index 6fa5e5177a4..ea58dae6593 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1855,7 +1855,7 @@ if test "x$with_gallium_drivers" != x; then
             ;;
         xr600)
             GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
-            gallium_check_st "r600/drm" "dri-r600" "" "" "xvmc-r600" "vdpau-r600" "va-r600"
+            gallium_check_st "r600/drm radeon/drm" "dri-r600" "" "" "xvmc-r600" "vdpau-r600" "va-r600"
             ;;
         xnouveau)
             PKG_CHECK_MODULES([NOUVEAU], [libdrm_nouveau >= $LIBDRM_NOUVEAU_REQUIRED])
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 461f59439e8..6181e8b3202 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -47,6 +47,7 @@
 #include "r600_resource.h"
 #include "r600_shader.h"
 #include "r600_pipe.h"
+#include "../../winsys/r600/drm/r600_drm_public.h"
 
 /*
  * pipe_context
@@ -563,9 +564,10 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen,
 	return TRUE;
 }
 
-struct pipe_screen *r600_screen_create(struct radeon *radeon)
+struct pipe_screen *r600_screen_create(struct radeon_winsys *rw)
 {
 	struct r600_screen *rscreen;
+	struct radeon *radeon = r600_drm_winsys_create(rw);
 
 	rscreen = CALLOC_STRUCT(r600_screen);
 	if (rscreen == NULL) {
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 6f399ed43b0..c53a191594b 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -26,6 +26,8 @@
 #ifndef R600_PIPE_H
 #define R600_PIPE_H
 
+#include "../../winsys/radeon/drm/radeon_winsys.h"
+
 #include <pipe/p_state.h>
 #include <pipe/p_screen.h>
 #include <pipe/p_context.h>
@@ -183,7 +185,7 @@ struct r600_pipe_context {
 	struct r600_pipe_state		*states[R600_PIPE_NSTATES];
 	struct r600_context		ctx;
 	struct r600_vertex_element	*vertex_elements;
-	struct r600_pipe_resource_state		fs_resource[PIPE_MAX_ATTRIBS];
+	struct r600_pipe_resource_state	fs_resource[PIPE_MAX_ATTRIBS];
 	struct pipe_framebuffer_state	framebuffer;
 	struct pipe_index_buffer	index_buffer;
 	unsigned			cb_target_mask;
diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h
index f1970201e89..1c82a7af68f 100644
--- a/src/gallium/drivers/r600/r600_public.h
+++ b/src/gallium/drivers/r600/r600_public.h
@@ -23,6 +23,8 @@
 #ifndef R600_PUBLIC_H
 #define R600_PUBLIC_H
 
-struct pipe_screen *r600_screen_create(struct radeon *radeon);
+struct radeon_winsys;
+
+struct pipe_screen *r600_screen_create(struct radeon_winsys *rw);
 
 #endif
diff --git a/src/gallium/targets/dri-r600/Makefile b/src/gallium/targets/dri-r600/Makefile
index 0c4de203d35..2d7463008fe 100644
--- a/src/gallium/targets/dri-r600/Makefile
+++ b/src/gallium/targets/dri-r600/Makefile
@@ -7,6 +7,7 @@ PIPE_DRIVERS = \
 	$(TOP)/src/gallium/drivers/r600/libr600.a \
 	$(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \
 	$(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/trace/libtrace.a \
 	$(TOP)/src/gallium/drivers/rbug/librbug.a \
 	$(TOP)/src/gallium/drivers/noop/libnoop.a
diff --git a/src/gallium/targets/dri-r600/target.c b/src/gallium/targets/dri-r600/target.c
index 8753e2bab17..1b8b6816ec1 100644
--- a/src/gallium/targets/dri-r600/target.c
+++ b/src/gallium/targets/dri-r600/target.c
@@ -1,14 +1,14 @@
 #include "state_tracker/drm_driver.h"
 #include "target-helpers/inline_debug_helper.h"
-#include "r600/drm/r600_drm_public.h"
+#include "radeon/drm/radeon_drm_public.h"
 #include "r600/r600_public.h"
 
 static struct pipe_screen *create_screen(int fd)
 {
-   struct radeon *radeon;
+   struct radeon_winsys *radeon;
    struct pipe_screen *screen;
 
-   radeon = r600_drm_winsys_create(fd);
+   radeon = radeon_drm_winsys_create(fd);
    if (!radeon)
       return NULL;
 
diff --git a/src/gallium/targets/egl-static/Makefile b/src/gallium/targets/egl-static/Makefile
index 42d34b8eda1..1583ab181ea 100644
--- a/src/gallium/targets/egl-static/Makefile
+++ b/src/gallium/targets/egl-static/Makefile
@@ -117,17 +117,20 @@ endif
 
 # r300
 ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),)
+ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),)
 egl_CPPFLAGS += -D_EGL_PIPE_R300=1
 egl_LIBS += \
 	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/r300/libr300.a
 endif
+endif
 
 # r600
 ifneq ($(findstring r600/drm,$(GALLIUM_WINSYS_DIRS)),)
 egl_CPPFLAGS += -D_EGL_PIPE_R600=1
 egl_LIBS += \
 	$(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/r600/libr600.a
 endif
 
diff --git a/src/gallium/targets/egl-static/egl_pipe.c b/src/gallium/targets/egl-static/egl_pipe.c
index 658c532b404..f2b50bd0eab 100644
--- a/src/gallium/targets/egl-static/egl_pipe.c
+++ b/src/gallium/targets/egl-static/egl_pipe.c
@@ -42,7 +42,6 @@
 #include "radeon/drm/radeon_drm_public.h"
 #include "r300/r300_public.h"
 /* for r600 */
-#include "r600/drm/r600_drm_public.h"
 #include "r600/r600_public.h"
 /* for vmwgfx */
 #include "svga/drm/svga_drm_public.h"
@@ -141,10 +140,10 @@ static struct pipe_screen *
 pipe_r600_create_screen(int fd)
 {
 #if _EGL_PIPE_R600
-   struct radeon *rw;
+   struct radeon_winsys *rw;
    struct pipe_screen *screen;
 
-   rw = r600_drm_winsys_create(fd);
+   rw = radeon_drm_winsys_create(fd);
    if (!rw)
       return NULL;
 
diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile
index faacc89f1a0..033a1acaaf9 100644
--- a/src/gallium/targets/gbm/Makefile
+++ b/src/gallium/targets/gbm/Makefile
@@ -83,6 +83,7 @@ r300_LIBS = \
 # r600 pipe driver
 r600_LIBS = \
 	$(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/r600/libr600.a
 
 # vmwgfx pipe driver
@@ -90,13 +91,18 @@ vmwgfx_LIBS = \
 	$(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \
 	$(TOP)/src/gallium/drivers/svga/libsvga.a
 
+
+
 # LLVM
 ifeq ($(MESA_LLVM),1)
-pipe_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
 pipe_SYS += $(LLVM_LIBS)
 pipe_LDFLAGS += $(LLVM_LDFLAGS)
 endif
 
+ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),)
+pipe_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
+endif
+
 # determine the targets/sources
 pipe_TARGETS =
 pipe_SOURCES =
@@ -117,9 +123,11 @@ pipe_SOURCES += pipe_nouveau.c
 endif
 
 ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),)
+ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),)
 pipe_TARGETS += $(PIPE_PREFIX)r300.so
 pipe_SOURCES += pipe_r300.c
 endif
+endif
 
 ifneq ($(findstring r600/drm,$(GALLIUM_WINSYS_DIRS)),)
 pipe_TARGETS += $(PIPE_PREFIX)r600.so
diff --git a/src/gallium/targets/gbm/pipe_r600.c b/src/gallium/targets/gbm/pipe_r600.c
index 486a6592585..9f61a51404a 100644
--- a/src/gallium/targets/gbm/pipe_r600.c
+++ b/src/gallium/targets/gbm/pipe_r600.c
@@ -1,16 +1,15 @@
-
 #include "state_tracker/drm_driver.h"
 #include "target-helpers/inline_debug_helper.h"
-#include "r600/drm/r600_drm_public.h"
+#include "radeon/drm/radeon_drm_public.h"
 #include "r600/r600_public.h"
 
 static struct pipe_screen *
 create_screen(int fd)
 {
-   struct radeon *rw;
+   struct radeon_winsys *rw;
    struct pipe_screen *screen;
 
-   rw = r600_drm_winsys_create(fd);
+   rw = radeon_drm_winsys_create(fd);
    if (!rw)
       return NULL;
 
diff --git a/src/gallium/targets/va-r600/Makefile b/src/gallium/targets/va-r600/Makefile
index 28797ad528d..d09a3aa8ad2 100644
--- a/src/gallium/targets/va-r600/Makefile
+++ b/src/gallium/targets/va-r600/Makefile
@@ -10,6 +10,7 @@ PIPE_DRIVERS = \
 	$(TOP)/src/gallium/drivers/r600/libr600.a \
 	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
         $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
 	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
         $(TOP)/src/gallium/drivers/trace/libtrace.a \
 	$(TOP)/src/gallium/auxiliary/libgallium.a
diff --git a/src/gallium/targets/va-r600/target.c b/src/gallium/targets/va-r600/target.c
index 8753e2bab17..1b8b6816ec1 100644
--- a/src/gallium/targets/va-r600/target.c
+++ b/src/gallium/targets/va-r600/target.c
@@ -1,14 +1,14 @@
 #include "state_tracker/drm_driver.h"
 #include "target-helpers/inline_debug_helper.h"
-#include "r600/drm/r600_drm_public.h"
+#include "radeon/drm/radeon_drm_public.h"
 #include "r600/r600_public.h"
 
 static struct pipe_screen *create_screen(int fd)
 {
-   struct radeon *radeon;
+   struct radeon_winsys *radeon;
    struct pipe_screen *screen;
 
-   radeon = r600_drm_winsys_create(fd);
+   radeon = radeon_drm_winsys_create(fd);
    if (!radeon)
       return NULL;
 
diff --git a/src/gallium/targets/vdpau-r600/Makefile b/src/gallium/targets/vdpau-r600/Makefile
index 0fd817b8e82..c2d95af295a 100644
--- a/src/gallium/targets/vdpau-r600/Makefile
+++ b/src/gallium/targets/vdpau-r600/Makefile
@@ -7,6 +7,7 @@ PIPE_DRIVERS = \
         $(TOP)/src/gallium/drivers/r600/libr600.a \
 	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
         $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
         $(TOP)/src/gallium/drivers/trace/libtrace.a \
 	$(TOP)/src/gallium/auxiliary/libgallium.a
 
diff --git a/src/gallium/targets/vdpau-r600/target.c b/src/gallium/targets/vdpau-r600/target.c
index 8753e2bab17..1b8b6816ec1 100644
--- a/src/gallium/targets/vdpau-r600/target.c
+++ b/src/gallium/targets/vdpau-r600/target.c
@@ -1,14 +1,14 @@
 #include "state_tracker/drm_driver.h"
 #include "target-helpers/inline_debug_helper.h"
-#include "r600/drm/r600_drm_public.h"
+#include "radeon/drm/radeon_drm_public.h"
 #include "r600/r600_public.h"
 
 static struct pipe_screen *create_screen(int fd)
 {
-   struct radeon *radeon;
+   struct radeon_winsys *radeon;
    struct pipe_screen *screen;
 
-   radeon = r600_drm_winsys_create(fd);
+   radeon = radeon_drm_winsys_create(fd);
    if (!radeon)
       return NULL;
 
diff --git a/src/gallium/targets/xvmc-r600/Makefile b/src/gallium/targets/xvmc-r600/Makefile
index 0bb72f1eff9..a10a42d18ed 100644
--- a/src/gallium/targets/xvmc-r600/Makefile
+++ b/src/gallium/targets/xvmc-r600/Makefile
@@ -7,6 +7,7 @@ PIPE_DRIVERS = \
         $(TOP)/src/gallium/drivers/r600/libr600.a \
 	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
         $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
         $(TOP)/src/gallium/drivers/trace/libtrace.a \
 	$(TOP)/src/gallium/auxiliary/libgallium.a
 
diff --git a/src/gallium/targets/xvmc-r600/target.c b/src/gallium/targets/xvmc-r600/target.c
index 8753e2bab17..1b8b6816ec1 100644
--- a/src/gallium/targets/xvmc-r600/target.c
+++ b/src/gallium/targets/xvmc-r600/target.c
@@ -1,14 +1,14 @@
 #include "state_tracker/drm_driver.h"
 #include "target-helpers/inline_debug_helper.h"
-#include "r600/drm/r600_drm_public.h"
+#include "radeon/drm/radeon_drm_public.h"
 #include "r600/r600_public.h"
 
 static struct pipe_screen *create_screen(int fd)
 {
-   struct radeon *radeon;
+   struct radeon_winsys *radeon;
    struct pipe_screen *screen;
 
-   radeon = r600_drm_winsys_create(fd);
+   radeon = radeon_drm_winsys_create(fd);
    if (!radeon)
       return NULL;
 
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index 8aa8c3df52a..325547ab4ec 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -37,6 +37,7 @@
 #include "r600_drm_public.h"
 #include "xf86drm.h"
 #include "radeon_drm.h"
+#include "../../radeon/drm/radeon_winsys.h"
 
 #ifndef RADEON_INFO_TILING_CONFIG
 #define RADEON_INFO_TILING_CONFIG 0x6
@@ -98,20 +99,6 @@ unsigned r600_get_minor_version(struct radeon *radeon)
 	return radeon->minor_version;
 }
 
-
-static int radeon_get_device(struct radeon *radeon)
-{
-	struct drm_radeon_info info = {};
-	int r;
-
-	radeon->device = 0;
-	info.request = RADEON_INFO_DEVICE_ID;
-	info.value = (uintptr_t)&radeon->device;
-	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
-			sizeof(struct drm_radeon_info));
-	return r;
-}
-
 static int r600_interpret_tiling(struct radeon *radeon, uint32_t tiling_config)
 {
 	switch ((tiling_config & 0xe) >> 1) {
@@ -320,39 +307,22 @@ static int handle_compare(void *key1, void *key2)
     return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
 }
 
-static struct radeon *radeon_new(int fd, unsigned device)
+static struct radeon *radeon_new(struct radeon_winsys *rw)
 {
 	struct radeon *radeon;
 	int r;
-	drmVersionPtr version;
 
 	radeon = calloc(1, sizeof(*radeon));
 	if (radeon == NULL) {
 		return NULL;
 	}
-	radeon->fd = fd;
-	radeon->device = device;
+
+	rw->query_info(rw, &radeon->info);
+	radeon->fd = radeon->info.fd;
+	radeon->device = radeon->info.pci_id;
+	radeon->num_backends = radeon->info.r600_num_backends;
 	radeon->refcount = 1;
-
-	version = drmGetVersion(radeon->fd);
-	if (version->version_major != 2) {
-		fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is "
-			"only compatible with 2.x.x\n", __FUNCTION__,
-			version->version_major, version->version_minor,
-			version->version_patchlevel);
-		drmFreeVersion(version);
-		exit(1);
-	}
-
-	radeon->minor_version = version->version_minor;
-
-	drmFreeVersion(version);
-
-	r = radeon_get_device(radeon);
-	if (r) {
-		fprintf(stderr, "Failed to get device id\n");
-		return radeon_decref(radeon);
-	}
+	radeon->minor_version = radeon->info.drm_minor;
 
 	radeon->family = radeon_family_from_device(radeon->device);
 	if (radeon->family == CHIP_UNKNOWN) {
@@ -436,9 +406,9 @@ static struct radeon *radeon_new(int fd, unsigned device)
 	return radeon;
 }
 
-struct radeon *r600_drm_winsys_create(int drmfd)
+struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw)
 {
-	return radeon_new(drmfd, 0);
+	return radeon_new(rw);
 }
 
 struct radeon *radeon_decref(struct radeon *radeon)
diff --git a/src/gallium/winsys/r600/drm/r600_drm_public.h b/src/gallium/winsys/r600/drm/r600_drm_public.h
index cfce8df9c2c..1d990f91013 100644
--- a/src/gallium/winsys/r600/drm/r600_drm_public.h
+++ b/src/gallium/winsys/r600/drm/r600_drm_public.h
@@ -26,8 +26,8 @@
 #ifndef R600_DRM_PUBLIC_H
 #define R600_DRM_PUBLIC_H
 
-struct radeon;
+struct radeon_winsys;
 
-struct radeon *r600_drm_winsys_create(int drmFD);
+struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw);
 
 #endif
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 75115fdaed7..7630b30b5f0 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -35,6 +35,7 @@
 #include "util/u_hash_table.h"
 #include <os/os_thread.h>
 #include "r600.h"
+#include "../../radeon/drm/radeon_winsys.h"
 
 #define PKT_COUNT_C                     0xC000FFFF
 #define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
@@ -43,6 +44,7 @@ struct r600_bomgr;
 struct r600_bo;
 
 struct radeon {
+	struct radeon_info		info;
 	int				fd;
 	int				refcount;
 	unsigned			device;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 5983e86c570..faeb66c8908 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -47,6 +47,9 @@
 #ifndef RADEON_INFO_WANT_CMASK
 #define RADEON_INFO_WANT_CMASK 8
 #endif
+#ifndef RADEON_INFO_NUM_BACKENDS
+#define RADEON_INFO_NUM_BACKENDS 10
+#endif
 
 /* Enable/disable feature access for one command stream.
  * If enable == TRUE, return TRUE on success.
@@ -175,6 +178,13 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
 #define CHIPSET(pci_id, name, family) case pci_id:
 #include "pci_ids/r300_pci_ids.h"
 #undef CHIPSET
+        ws->gen = R300;
+        break;
+
+#define CHIPSET(pci_id, name, family) case pci_id:
+#include "pci_ids/r600_pci_ids.h"
+#undef CHIPSET
+        ws->gen = R600;
         break;
 
     default:
@@ -196,15 +206,23 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
     ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 
     /* Generation-specific queries. */
-    if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES,
-                              "GB pipe count",
-                              &ws->info.r300_num_gb_pipes))
-        return FALSE;
+    if (ws->gen == R300) {
+        if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES,
+                                  "GB pipe count",
+                                  &ws->info.r300_num_gb_pipes))
+            return FALSE;
 
-    if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES,
-                              "Z pipe count",
-                              &ws->info.r300_num_z_pipes))
-        return FALSE;
+        if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES,
+                                  "Z pipe count",
+                                  &ws->info.r300_num_z_pipes))
+            return FALSE;
+    }
+    else if (ws->gen == R600) {
+        if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS,
+                                  "num backends",
+                                  &ws->info.r600_num_backends))
+            return FALSE;
+    }
 
     return TRUE;
 }
@@ -263,6 +281,7 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd)
     }
 
     ws->fd = fd;
+    ws->info.fd = fd;
 
     if (!do_winsys_init(ws))
         goto fail;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
index 347e1f1d11a..69216448496 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -33,12 +33,18 @@
 #include "radeon_winsys.h"
 #include "os/os_thread.h"
 
+enum radeon_generation {
+    R300,
+    R600
+};
+
 struct radeon_drm_winsys {
     struct radeon_winsys base;
 
     int fd; /* DRM file descriptor */
     int num_cs; /* The number of command streams created. */
 
+    enum radeon_generation gen;
     struct radeon_info info;
 
     struct pb_manager *kman;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 6d52dc25022..2a49e615981 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -73,6 +73,7 @@ struct radeon_info {
     uint32_t pci_id;
     uint32_t gart_size;
     uint32_t vram_size;
+    uint32_t fd; /* XXX transitional */
 
     uint32_t drm_major; /* version */
     uint32_t drm_minor;
@@ -80,6 +81,8 @@ struct radeon_info {
 
     uint32_t r300_num_gb_pipes;
     uint32_t r300_num_z_pipes;
+
+    uint32_t r600_num_backends;
 };
 
 enum radeon_feature_id {

From fb8cf51eeb91413e761e0510d1f8c11b8cd0a7ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 22 Jul 2011 20:15:47 +0200
Subject: [PATCH 308/600] r600g: move some queries into winsys/radeon

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_drm.c        | 91 +++----------------
 src/gallium/winsys/r600/drm/r600_hw_context.c |  2 +-
 src/gallium/winsys/r600/drm/r600_priv.h       |  6 --
 src/gallium/winsys/r600/drm/radeon_bo.c       | 18 ++--
 .../winsys/radeon/drm/radeon_drm_winsys.c     | 32 +++++--
 src/gallium/winsys/radeon/drm/radeon_winsys.h |  2 +
 6 files changed, 51 insertions(+), 100 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index 325547ab4ec..ab15257efb2 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -76,12 +76,12 @@ struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon)
 
 unsigned r600_get_clock_crystal_freq(struct radeon *radeon)
 {
-	return radeon->clock_crystal_freq;
+	return radeon->info.r600_clock_crystal_freq;
 }
 
 unsigned r600_get_num_backends(struct radeon *radeon)
 {
-	return radeon->num_backends;
+	return radeon->info.r600_num_backends;
 }
 
 unsigned r600_get_num_tile_pipes(struct radeon *radeon)
@@ -96,7 +96,7 @@ unsigned r600_get_backend_map(struct radeon *radeon)
 
 unsigned r600_get_minor_version(struct radeon *radeon)
 {
-	return radeon->minor_version;
+	return radeon->info.drm_minor;
 }
 
 static int r600_interpret_tiling(struct radeon *radeon, uint32_t tiling_config)
@@ -191,59 +191,16 @@ static int eg_interpret_tiling(struct radeon *radeon, uint32_t tiling_config)
 
 static int radeon_drm_get_tiling(struct radeon *radeon)
 {
-	struct drm_radeon_info info = {};
-	int r;
-	uint32_t tiling_config = 0;
+	uint32_t tiling_config = radeon->info.r600_tiling_config;
 
-	info.request = RADEON_INFO_TILING_CONFIG;
-	info.value = (uintptr_t)&tiling_config;
-	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
-				sizeof(struct drm_radeon_info));
-
-	if (r)
+	if (!tiling_config)
 		return 0;
 
 	if (radeon->chip_class == R600 || radeon->chip_class == R700) {
-		r = r600_interpret_tiling(radeon, tiling_config);
+		return r600_interpret_tiling(radeon, tiling_config);
 	} else {
-		r = eg_interpret_tiling(radeon, tiling_config);
+		return eg_interpret_tiling(radeon, tiling_config);
 	}
-	return r;
-}
-
-static int radeon_get_clock_crystal_freq(struct radeon *radeon)
-{
-	struct drm_radeon_info info = {};
-	uint32_t clock_crystal_freq = 0;
-	int r;
-
-	info.request = RADEON_INFO_CLOCK_CRYSTAL_FREQ;
-	info.value = (uintptr_t)&clock_crystal_freq;
-	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
-			sizeof(struct drm_radeon_info));
-	if (r)
-		return r;
-
-	radeon->clock_crystal_freq = clock_crystal_freq;
-	return 0;
-}
-
-
-static int radeon_get_num_backends(struct radeon *radeon)
-{
-	struct drm_radeon_info info = {};
-	uint32_t num_backends = 0;
-	int r;
-
-	info.request = RADEON_INFO_NUM_BACKENDS;
-	info.value = (uintptr_t)&num_backends;
-	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
-			sizeof(struct drm_radeon_info));
-	if (r)
-		return r;
-
-	radeon->num_backends = num_backends;
-	return 0;
 }
 
 static int radeon_get_num_tile_pipes(struct radeon *radeon)
@@ -254,7 +211,7 @@ static int radeon_get_num_tile_pipes(struct radeon *radeon)
 
 	info.request = RADEON_INFO_NUM_TILE_PIPES;
 	info.value = (uintptr_t)&num_tile_pipes;
-	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
+	r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_INFO, &info,
 			sizeof(struct drm_radeon_info));
 	if (r)
 		return r;
@@ -271,7 +228,7 @@ static int radeon_get_backend_map(struct radeon *radeon)
 
 	info.request = RADEON_INFO_BACKEND_MAP;
 	info.value = (uintptr_t)&backend_map;
-	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
+	r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_INFO, &info,
 			sizeof(struct drm_radeon_info));
 	if (r)
 		return r;
@@ -282,7 +239,6 @@ static int radeon_get_backend_map(struct radeon *radeon)
 	return 0;
 }
 
-
 static int radeon_init_fence(struct radeon *radeon)
 {
 	radeon->fence = 1;
@@ -307,7 +263,7 @@ static int handle_compare(void *key1, void *key2)
     return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
 }
 
-static struct radeon *radeon_new(struct radeon_winsys *rw)
+struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw)
 {
 	struct radeon *radeon;
 	int r;
@@ -318,15 +274,10 @@ static struct radeon *radeon_new(struct radeon_winsys *rw)
 	}
 
 	rw->query_info(rw, &radeon->info);
-	radeon->fd = radeon->info.fd;
-	radeon->device = radeon->info.pci_id;
-	radeon->num_backends = radeon->info.r600_num_backends;
-	radeon->refcount = 1;
-	radeon->minor_version = radeon->info.drm_minor;
 
-	radeon->family = radeon_family_from_device(radeon->device);
+	radeon->family = radeon_family_from_device(radeon->info.pci_id);
 	if (radeon->family == CHIP_UNKNOWN) {
-		fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->device);
+		fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->info.pci_id);
 		return radeon_decref(radeon);
 	}
 	/* setup class */
@@ -373,20 +324,14 @@ static struct radeon *radeon_new(struct radeon_winsys *rw)
 		break;
 	default:
 		fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n",
-			__func__, radeon->device);
+			__func__, radeon->info.pci_id);
 		break;
 	}
 
 	if (radeon_drm_get_tiling(radeon))
 		return NULL;
 
-	/* get the GPU counter frequency, failure is non fatal */
-	radeon_get_clock_crystal_freq(radeon);
-
-	if (radeon->minor_version >= 9)
-		radeon_get_num_backends(radeon);
-
-	if (radeon->minor_version >= 11) {
+	if (radeon->info.drm_minor >= 11) {
 		radeon_get_num_tile_pipes(radeon);
 		radeon_get_backend_map(radeon);
 	}
@@ -406,18 +351,10 @@ static struct radeon *radeon_new(struct radeon_winsys *rw)
 	return radeon;
 }
 
-struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw)
-{
-	return radeon_new(rw);
-}
-
 struct radeon *radeon_decref(struct radeon *radeon)
 {
 	if (radeon == NULL)
 		return NULL;
-	if (--radeon->refcount > 0) {
-		return NULL;
-	}
 
 	util_hash_table_destroy(radeon->bo_handles);
 	pipe_mutex_destroy(radeon->bo_handles_mutex);
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 30af4e8066f..46ca4ed907a 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -1621,7 +1621,7 @@ void r600_context_flush(struct r600_context *ctx)
 	chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc;
 	chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0];
 	chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1];
-	r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib,
+	r = drmCommandWriteRead(ctx->radeon->info.fd, DRM_RADEON_CS, &drmib,
 				sizeof(struct drm_radeon_cs));
 	if (r) {
 		fprintf(stderr, "radeon: The kernel rejected CS, "
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 7630b30b5f0..9fc7c534646 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -45,9 +45,6 @@ struct r600_bo;
 
 struct radeon {
 	struct radeon_info		info;
-	int				fd;
-	int				refcount;
-	unsigned			device;
 	unsigned			family;
 	enum chip_class			chip_class;
 	struct r600_tiling_info		tiling_info;
@@ -55,12 +52,9 @@ struct radeon {
 	unsigned			fence;
 	unsigned			*cfence;
 	struct r600_bo			*fence_bo;
-	unsigned			clock_crystal_freq;
-	unsigned			num_backends;
 	unsigned			num_tile_pipes;
 	unsigned			backend_map;
 	boolean				backend_map_valid;
-	unsigned                        minor_version;
 
         /* List of buffer handles and its mutex. */
 	struct util_hash_table          *bo_handles;
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index 45cf6f09671..34696da515b 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -44,14 +44,14 @@ int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo)
 	args.handle = bo->handle;
 	args.offset = 0;
 	args.size = (uint64_t)bo->size;
-	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_MMAP,
+	r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_MMAP,
 				&args, sizeof(args));
 	if (r) {
 		fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n",
 			bo, bo->handle, r);
 		return r;
 	}
-	ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->fd, args.addr_ptr);
+	ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->info.fd, args.addr_ptr);
 	if (ptr == MAP_FAILED) {
 		fprintf(stderr, "%s failed to map bo\n", __func__);
 		return -errno;
@@ -101,7 +101,7 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
 
 		memset(&open_arg, 0, sizeof(open_arg));
 		open_arg.name = handle;
-		r = drmIoctl(radeon->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
+		r = drmIoctl(radeon->info.fd, DRM_IOCTL_GEM_OPEN, &open_arg);
 		if (r != 0) {
 			free(bo);
 			return NULL;
@@ -118,7 +118,7 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
 		args.initial_domain = initial_domain;
 		args.flags = 0;
 		args.handle = 0;
-		r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_CREATE,
+		r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_CREATE,
 					&args, sizeof(args));
 		bo->handle = args.handle;
 		if (r) {
@@ -153,7 +153,7 @@ static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo)
 	radeon_bo_fixed_unmap(radeon, bo);
 	memset(&args, 0, sizeof(args));
 	args.handle = bo->handle;
-	drmIoctl(radeon->fd, DRM_IOCTL_GEM_CLOSE, &args);
+	drmIoctl(radeon->info.fd, DRM_IOCTL_GEM_CLOSE, &args);
 	memset(bo, 0, sizeof(struct radeon_bo));
 	free(bo);
 }
@@ -188,7 +188,7 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo)
 	memset(&args, 0, sizeof(args));
 	args.handle = bo->handle;
 	do {
-		ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_WAIT_IDLE,
+		ret = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_WAIT_IDLE,
 					&args, sizeof(args));
 	} while (ret == -EBUSY);
 	return ret;
@@ -213,7 +213,7 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain
 	args.handle = bo->handle;
 	args.domain = 0;
 
-	ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_BUSY,
+	ret = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_BUSY,
 			&args, sizeof(args));
 
 	*domain = args.domain;
@@ -229,7 +229,7 @@ int radeon_bo_get_tiling_flags(struct radeon *radeon,
 	int ret;
 
 	args.handle = bo->handle;
-	ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_GET_TILING,
+	ret = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_GET_TILING,
 				  &args, sizeof(args));
 	if (ret)
 		return ret;
@@ -247,7 +247,7 @@ int radeon_bo_get_name(struct radeon *radeon,
 	int ret;
 
 	flink.handle = bo->handle;
-	ret = drmIoctl(radeon->fd, DRM_IOCTL_GEM_FLINK, &flink);
+	ret = drmIoctl(radeon->info.fd, DRM_IOCTL_GEM_FLINK, &flink);
 	if (ret)
 		return ret;
 
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index faeb66c8908..3be6e34f6f0 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -41,12 +41,22 @@
 #include <xf86drm.h>
 #include <stdio.h>
 
+#ifndef RADEON_INFO_TILING_CONFIG
+#define RADEON_INFO_TILING_CONFIG 6
+#endif
+
 #ifndef RADEON_INFO_WANT_HYPERZ
 #define RADEON_INFO_WANT_HYPERZ 7
 #endif
+
 #ifndef RADEON_INFO_WANT_CMASK
 #define RADEON_INFO_WANT_CMASK 8
 #endif
+
+#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ
+#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 9
+#endif
+
 #ifndef RADEON_INFO_NUM_BACKENDS
 #define RADEON_INFO_NUM_BACKENDS 10
 #endif
@@ -107,7 +117,7 @@ static boolean radeon_set_fd_access(struct radeon_drm_cs *applier,
 }
 
 static boolean radeon_get_drm_value(int fd, unsigned request,
-                                    const char *name, uint32_t *out)
+                                    const char *errname, uint32_t *out)
 {
     struct drm_radeon_info info = {0};
     int retval;
@@ -116,9 +126,9 @@ static boolean radeon_get_drm_value(int fd, unsigned request,
     info.request = request;
 
     retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
-    if (retval) {
-        fprintf(stderr, "%s: Failed to get %s, error number %d\n",
-                __func__, name, retval);
+    if (retval && errname) {
+        fprintf(stderr, "radeon: Failed to get %s, error number %d\n",
+                errname, retval);
         return FALSE;
     }
     return TRUE;
@@ -196,8 +206,8 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
     retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO,
             &gem_info, sizeof(gem_info));
     if (retval) {
-        fprintf(stderr, "%s: Failed to get MM info, error number %d\n",
-                __FUNCTION__, retval);
+        fprintf(stderr, "radeon: Failed to get MM info, error number %d\n",
+                retval);
         return FALSE;
     }
     ws->info.gart_size = gem_info.gart_size;
@@ -218,10 +228,18 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
             return FALSE;
     }
     else if (ws->gen == R600) {
-        if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS,
+        if (ws->info.drm_minor >= 9 &&
+            !radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS,
                                   "num backends",
                                   &ws->info.r600_num_backends))
             return FALSE;
+
+        /* get the GPU counter frequency, failure is not fatal */
+        radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL,
+                             &ws->info.r600_clock_crystal_freq);
+
+        radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL,
+                             &ws->info.r600_tiling_config);
     }
 
     return TRUE;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 2a49e615981..2948ea78c18 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -83,6 +83,8 @@ struct radeon_info {
     uint32_t r300_num_z_pipes;
 
     uint32_t r600_num_backends;
+    uint32_t r600_clock_crystal_freq;
+    uint32_t r600_tiling_config;
 };
 
 enum radeon_feature_id {

From 354f76f386afd980e8c1564c0b0502f9768007b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 22 Jul 2011 21:38:56 +0200
Subject: [PATCH 309/600] r600g: cleanup includes in winsys

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600.h               |  8 ++----
 src/gallium/drivers/r600/r600_asm.h           |  2 --
 .../winsys/r600/drm/evergreen_hw_context.c    | 17 ++++--------
 src/gallium/winsys/r600/drm/r600_bo.c         |  6 +----
 src/gallium/winsys/r600/drm/r600_bomgr.c      |  6 ++---
 src/gallium/winsys/r600/drm/r600_drm.c        | 27 +++----------------
 src/gallium/winsys/r600/drm/r600_hw_context.c | 15 +++--------
 src/gallium/winsys/r600/drm/r600_priv.h       | 10 ++-----
 src/gallium/winsys/r600/drm/radeon_bo.c       | 10 +++----
 src/gallium/winsys/r600/drm/radeon_pciid.c    |  1 -
 10 files changed, 24 insertions(+), 78 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 2ac5ed465c1..714af0c112d 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -26,12 +26,8 @@
 #ifndef R600_H
 #define R600_H
 
-#include <assert.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <util/u_double_list.h>
-#include <util/u_inlines.h>
-#include <pipe/p_compiler.h>
+#include "util/u_double_list.h"
+#include "util/u_inlines.h"
 
 #define RADEON_CTX_MAX_PM4	(64 * 1024 / 4)
 
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index cbdaacf7178..5dec95acf1d 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -23,8 +23,6 @@
 #ifndef R600_ASM_H
 #define R600_ASM_H
 
-#include "util/u_double_list.h"
-
 struct r600_vertex_element;
 struct r600_pipe_context;
 
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 7fe2050cd84..412533e44bc 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -23,19 +23,13 @@
  * Authors:
  *      Jerome Glisse
  */
-#include <errno.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdlib.h>
-#include <assert.h>
-#include "xf86drm.h"
 #include "r600.h"
-#include "evergreend.h"
-#include "radeon_drm.h"
-#include "pipe/p_compiler.h"
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
 #include "r600_priv.h"
+#include "evergreend.h"
+#include "util/u_memory.h"
+#include "radeon_drm.h"
+#include "xf86drm.h"
+#include <errno.h>
 
 #define GROUP_FORCE_NEW_BLOCK	0
 
@@ -1271,4 +1265,3 @@ void evergreen_context_flush_dest_caches(struct r600_context *ctx)
 
 	ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
 }
-
diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 0f5b063cf5a..f6e15630d71 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -23,13 +23,9 @@
  * Authors:
  *      Dave Airlie
  */
-#include <pipe/p_compiler.h>
-#include <pipe/p_screen.h>
-#include <pipebuffer/pb_bufmgr.h>
-#include "state_tracker/drm_driver.h"
 #include "r600_priv.h"
 #include "r600d.h"
-#include "drm.h"
+#include "state_tracker/drm_driver.h"
 #include "radeon_drm.h"
 
 struct r600_bo *r600_bo(struct radeon *radeon,
diff --git a/src/gallium/winsys/r600/drm/r600_bomgr.c b/src/gallium/winsys/r600/drm/r600_bomgr.c
index 4918d5eb0b1..5cea7654d97 100644
--- a/src/gallium/winsys/r600/drm/r600_bomgr.c
+++ b/src/gallium/winsys/r600/drm/r600_bomgr.c
@@ -26,11 +26,9 @@
  *      Thomas Hellström <thomas-at-vmware-dot-com>
  *      Jerome Glisse <jglisse@redhat.com>
  */
-#include <util/u_memory.h>
-#include <util/u_double_list.h>
-#include <util/u_time.h>
-#include <pipebuffer/pb_bufmgr.h>
 #include "r600_priv.h"
+#include "util/u_memory.h"
+#include "util/u_time.h"
 
 static void r600_bomgr_timeout_flush(struct r600_bomgr *mgr)
 {
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index ab15257efb2..3a2186c6f4a 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -25,31 +25,12 @@
  *      Corbin Simpson <MostAwesomeDude@gmail.com>
  *      Joakim Sindholt <opensource@zhasha.com>
  */
-#include <stdio.h>
-#include <errno.h>
-#include <sys/ioctl.h>
-#include "util/u_inlines.h"
-#include "util/u_debug.h"
-#include "util/u_hash_table.h"
-#include <pipebuffer/pb_bufmgr.h>
-#include "r600.h"
+
 #include "r600_priv.h"
 #include "r600_drm_public.h"
-#include "xf86drm.h"
-#include "radeon_drm.h"
-#include "../../radeon/drm/radeon_winsys.h"
-
-#ifndef RADEON_INFO_TILING_CONFIG
-#define RADEON_INFO_TILING_CONFIG 0x6
-#endif
-
-#ifndef RADEON_INFO_CLOCK_CRYSTAL_FREQ
-#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x9
-#endif
-
-#ifndef RADEON_INFO_NUM_BACKENDS
-#define RADEON_INFO_NUM_BACKENDS 0xa
-#endif
+#include <radeon_drm.h>
+#include <xf86drm.h>
+#include <errno.h>
 
 #ifndef RADEON_INFO_NUM_TILE_PIPES
 #define RADEON_INFO_NUM_TILE_PIPES 0xb
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 46ca4ed907a..60de24fa67f 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -23,19 +23,12 @@
  * Authors:
  *      Jerome Glisse
  */
-#include <errno.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <pipe/p_compiler.h>
-#include <util/u_inlines.h>
-#include <util/u_memory.h>
-#include <pipebuffer/pb_bufmgr.h>
-#include "xf86drm.h"
-#include "radeon_drm.h"
 #include "r600_priv.h"
 #include "r600d.h"
+#include "util/u_memory.h"
+#include "radeon_drm.h"
+#include "xf86drm.h"
+#include <errno.h>
 
 #define GROUP_FORCE_NEW_BLOCK	0
 
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 9fc7c534646..08a3552803c 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -26,16 +26,10 @@
 #ifndef R600_PRIV_H
 #define R600_PRIV_H
 
-#include <errno.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <util/u_double_list.h>
-#include <util/u_inlines.h>
-#include "util/u_hash_table.h"
-#include <os/os_thread.h>
 #include "r600.h"
 #include "../../radeon/drm/radeon_winsys.h"
+#include "util/u_hash_table.h"
+#include "os/os_thread.h"
 
 #define PKT_COUNT_C                     0xC000FFFF
 #define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index 34696da515b..80336de4537 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -24,14 +24,12 @@
  *      Jerome Glisse
  */
 #define _FILE_OFFSET_BITS 64
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include "r600_priv.h"
+#include "util/u_hash_table.h"
+#include "radeon_drm.h"
+#include "xf86drm.h"
 #include <sys/mman.h>
 #include <errno.h>
-#include "r600_priv.h"
-#include "xf86drm.h"
-#include "radeon_drm.h"
 
 int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo)
 {
diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c
index f54a7c8fe72..87572417c80 100644
--- a/src/gallium/winsys/r600/drm/radeon_pciid.c
+++ b/src/gallium/winsys/r600/drm/radeon_pciid.c
@@ -23,7 +23,6 @@
  * Authors:
  *      Jerome Glisse
  */
-#include <stdlib.h>
 #include "r600_priv.h"
 
 struct pci_id {

From 2ce783d8ddec1b1fcadc0798af0ebb045bba1cc4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 2 Aug 2011 20:25:13 +0200
Subject: [PATCH 310/600] r600g: put radeon_winsys in screen::winsys, don't
 include drm_driver in the pipe

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600.h               |  7 +++---
 src/gallium/drivers/r600/r600_buffer.c        | 23 ++++++++++---------
 src/gallium/drivers/r600/r600_pipe.c          | 10 ++++----
 src/gallium/drivers/r600/r600_pipe.h          |  1 +
 src/gallium/drivers/r600/r600_public.h        |  2 +-
 src/gallium/drivers/r600/r600_texture.c       | 22 ++++++++----------
 .../winsys/r600/drm/evergreen_hw_context.c    |  1 +
 src/gallium/winsys/r600/drm/r600_bo.c         | 11 +++++----
 src/gallium/winsys/r600/drm/r600_drm.c        | 18 +++++++--------
 src/gallium/winsys/r600/drm/r600_drm_public.h |  2 +-
 src/gallium/winsys/r600/drm/r600_hw_context.c |  1 +
 src/gallium/winsys/r600/drm/r600_priv.h       |  1 +
 12 files changed, 54 insertions(+), 45 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 714af0c112d..7e704730122 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -95,11 +95,12 @@ unsigned r600_get_backend_map(struct radeon *radeon);
 
 /* r600_bo.c */
 struct r600_bo;
+
 struct r600_bo *r600_bo(struct radeon *radeon,
 			unsigned size, unsigned alignment,
 			unsigned binding, unsigned usage);
-struct r600_bo *r600_bo_handle(struct radeon *radeon,
-				unsigned handle, unsigned *array_mode);
+struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle,
+				unsigned *stride, unsigned *array_mode);
 void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx);
 void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
 boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo,
@@ -315,7 +316,7 @@ void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, stru
 void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
 void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
 
-struct radeon *radeon_decref(struct radeon *radeon);
+struct radeon *radeon_destroy(struct radeon *radeon);
 
 void _r600_pipe_state_add_reg(struct r600_context *ctx,
 			      struct r600_pipe_state *state,
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 72f352df3c3..28d8c6af1cb 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -33,8 +33,6 @@
 #include <util/u_memory.h>
 #include "util/u_upload_mgr.h"
 
-#include "state_tracker/drm_driver.h"
-
 #include <xf86drm.h>
 #include "radeon_drm.h"
 
@@ -48,7 +46,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen,
 	struct r600_resource_buffer *rbuffer = r600_buffer(buf);
 
 	if (rbuffer->r.bo) {
-		r600_bo_reference((struct radeon*)screen->winsys, &rbuffer->r.bo, NULL);
+		r600_bo_reference(rscreen->radeon, &rbuffer->r.bo, NULL);
 	}
 	rbuffer->r.bo = NULL;
 	util_slab_free(&rscreen->pool_buffers, rbuffer);
@@ -81,12 +79,13 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
 				      struct pipe_transfer *transfer)
 {
 	struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
+	struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe;
 	uint8_t *data;
 
 	if (rbuffer->r.b.user_ptr)
 		return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x;
 
-	data = r600_bo_map((struct radeon*)pipe->winsys, rbuffer->r.bo, transfer->usage, pipe);
+	data = r600_bo_map(rctx->screen->radeon, rbuffer->r.bo, transfer->usage, pipe);
 	if (!data)
 		return NULL;
 
@@ -97,12 +96,13 @@ static void r600_buffer_transfer_unmap(struct pipe_context *pipe,
 					struct pipe_transfer *transfer)
 {
 	struct r600_resource_buffer *rbuffer = r600_buffer(transfer->resource);
+	struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe;
 
 	if (rbuffer->r.b.user_ptr)
 		return;
 
 	if (rbuffer->r.bo)
-		r600_bo_unmap((struct radeon*)pipe->winsys, rbuffer->r.bo);
+		r600_bo_unmap(rctx->screen->radeon, rbuffer->r.bo);
 }
 
 static void r600_buffer_transfer_flush_region(struct pipe_context *pipe,
@@ -127,20 +127,21 @@ static void r600_buffer_transfer_inline_write(struct pipe_context *pipe,
 						unsigned stride,
 						unsigned layer_stride)
 {
-	struct radeon *ws = (struct radeon*)pipe->winsys;
+	struct r600_pipe_context *rctx = (struct r600_pipe_context*)pipe;
+	struct radeon *radeon = rctx->screen->radeon;
 	struct r600_resource_buffer *rbuffer = r600_buffer(resource);
 	uint8_t *map = NULL;
 
 	assert(rbuffer->r.b.user_ptr == NULL);
 
-	map = r600_bo_map(ws, rbuffer->r.bo,
+	map = r600_bo_map(radeon, rbuffer->r.bo,
 			  PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage,
 			  pipe);
 
 	memcpy(map + box->x, data, box->width);
 
 	if (rbuffer->r.bo)
-		r600_bo_unmap(ws, rbuffer->r.bo);
+		r600_bo_unmap(radeon, rbuffer->r.bo);
 }
 
 static const struct u_resource_vtbl r600_buffer_vtbl =
@@ -175,7 +176,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
 	rbuffer->r.size = rbuffer->r.b.b.b.width0;
 	rbuffer->r.bo_size = rbuffer->r.size;
 
-	bo = r600_bo((struct radeon*)screen->winsys,
+	bo = r600_bo(rscreen->radeon,
 		     rbuffer->r.b.b.b.width0,
 		     alignment, rbuffer->r.b.b.b.bind,
 		     rbuffer->r.b.b.b.usage);
@@ -219,11 +220,11 @@ struct pipe_resource *r600_user_buffer_create(struct pipe_screen *screen,
 struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
 					      struct winsys_handle *whandle)
 {
-	struct radeon *rw = (struct radeon*)screen->winsys;
+	struct radeon *rw = ((struct r600_screen*)screen)->radeon;
 	struct r600_resource *rbuffer;
 	struct r600_bo *bo = NULL;
 
-	bo = r600_bo_handle(rw, whandle->handle, NULL);
+	bo = r600_bo_handle(rw, whandle, NULL, NULL);
 	if (bo == NULL) {
 		return NULL;
 	}
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 6181e8b3202..8a18207d1ea 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -500,7 +500,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
 	if (rscreen == NULL)
 		return;
 
-	radeon_decref(rscreen->radeon);
+	radeon_destroy(rscreen->radeon);
+	rscreen->ws->destroy(rscreen->ws);
 
 	util_slab_destroy(&rscreen->pool_buffers);
 	pipe_mutex_destroy(rscreen->mutex_num_contexts);
@@ -564,18 +565,19 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen,
 	return TRUE;
 }
 
-struct pipe_screen *r600_screen_create(struct radeon_winsys *rw)
+struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
 {
 	struct r600_screen *rscreen;
-	struct radeon *radeon = r600_drm_winsys_create(rw);
+	struct radeon *radeon = radeon_create(ws);
 
 	rscreen = CALLOC_STRUCT(r600_screen);
 	if (rscreen == NULL) {
 		return NULL;
 	}
 
+	rscreen->ws = ws;
 	rscreen->radeon = radeon;
-	rscreen->screen.winsys = (struct pipe_winsys*)radeon;
+	rscreen->screen.winsys = (struct pipe_winsys*)ws;
 	rscreen->screen.destroy = r600_destroy_screen;
 	rscreen->screen.get_name = r600_get_name;
 	rscreen->screen.get_vendor = r600_get_vendor;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index c53a191594b..3ca003aa244 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -74,6 +74,7 @@ enum r600_pipe_state_id {
 
 struct r600_screen {
 	struct pipe_screen		screen;
+	struct radeon_winsys		*ws;
 	struct radeon			*radeon;
 	struct r600_tiling_info		*tiling_info;
 	struct util_slab_mempool	pool_buffers;
diff --git a/src/gallium/drivers/r600/r600_public.h b/src/gallium/drivers/r600/r600_public.h
index 1c82a7af68f..e4fe23a87b7 100644
--- a/src/gallium/drivers/r600/r600_public.h
+++ b/src/gallium/drivers/r600/r600_public.h
@@ -25,6 +25,6 @@
 
 struct radeon_winsys;
 
-struct pipe_screen *r600_screen_create(struct radeon_winsys *rw);
+struct pipe_screen *r600_screen_create(struct radeon_winsys *ws);
 
 #endif
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 927eb5dafc9..386d8f35015 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -31,7 +31,6 @@
 #include <util/u_math.h>
 #include <util/u_inlines.h>
 #include <util/u_memory.h>
-#include "state_tracker/drm_driver.h"
 #include "pipebuffer/pb_buffer.h"
 #include "r600_pipe.h"
 #include "r600_resource.h"
@@ -239,7 +238,7 @@ static void r600_setup_miptree(struct pipe_screen *screen,
 			       unsigned array_mode)
 {
 	struct pipe_resource *ptex = &rtex->resource.b.b.b;
-	struct radeon *radeon = (struct radeon *)screen->winsys;
+	struct radeon *radeon = ((struct r600_screen*)screen)->radeon;
 	enum chip_class chipc = r600_get_family_class(radeon);
 	unsigned size, layer_size, i, offset;
 	unsigned nblocksx, nblocksy, extra_size = 0;
@@ -329,7 +328,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
 {
 	struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
 	struct r600_resource *resource = &rtex->resource;
-	struct radeon *radeon = (struct radeon *)screen->winsys;
+	struct radeon *radeon = ((struct r600_screen*)screen)->radeon;
 
 	return r600_bo_get_winsys_handle(radeon, resource->bo,
 			rtex->pitch_in_bytes[0], whandle);
@@ -340,7 +339,7 @@ static void r600_texture_destroy(struct pipe_screen *screen,
 {
 	struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
 	struct r600_resource *resource = &rtex->resource;
-	struct radeon *radeon = (struct radeon *)screen->winsys;
+	struct radeon *radeon = ((struct r600_screen*)screen)->radeon;
 
 	if (rtex->flushed_depth_texture)
 		pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
@@ -373,7 +372,7 @@ r600_texture_create_object(struct pipe_screen *screen,
 {
 	struct r600_resource_texture *rtex;
 	struct r600_resource *resource;
-	struct radeon *radeon = (struct radeon *)screen->winsys;
+	struct radeon *radeon = ((struct r600_screen*)screen)->radeon;
 
 	rtex = CALLOC_STRUCT(r600_resource_texture);
 	if (rtex == NULL)
@@ -483,8 +482,9 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
 					       const struct pipe_resource *templ,
 					       struct winsys_handle *whandle)
 {
-	struct radeon *rw = (struct radeon*)screen->winsys;
+	struct radeon *rw = ((struct r600_screen*)screen)->radeon;
 	struct r600_bo *bo = NULL;
+	unsigned stride = 0;
 	unsigned array_mode = 0;
 
 	/* Support only 2D textures without mipmaps */
@@ -492,15 +492,13 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
 	      templ->depth0 != 1 || templ->last_level != 0)
 		return NULL;
 
-	bo = r600_bo_handle(rw, whandle->handle, &array_mode);
+	bo = r600_bo_handle(rw, whandle, &stride, &array_mode);
 	if (bo == NULL) {
 		return NULL;
 	}
 
 	return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
-								  whandle->stride,
-								  0,
-								  bo);
+								  stride, 0, bo);
 }
 
 int r600_texture_depth_flush(struct pipe_context *ctx,
@@ -687,7 +685,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
 	struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
 	struct r600_bo *bo;
 	enum pipe_format format = transfer->resource->format;
-	struct radeon *radeon = (struct radeon *)ctx->screen->winsys;
+	struct radeon *radeon = ((struct r600_screen*)ctx->screen)->radeon;
 	unsigned offset = 0;
 	char *map;
 
@@ -717,7 +715,7 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
 				 struct pipe_transfer* transfer)
 {
 	struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
-	struct radeon *radeon = (struct radeon *)ctx->screen->winsys;
+	struct radeon *radeon = ((struct r600_screen*)ctx->screen)->radeon;
 	struct r600_bo *bo;
 
 	if (rtransfer->staging_texture) {
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 412533e44bc..94206d5568b 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -902,6 +902,7 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
 
 	memset(ctx, 0, sizeof(struct r600_context));
 	ctx->radeon = radeon;
+
 	LIST_INITHEAD(&ctx->query_list);
 
 	/* init dirty list */
diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index f6e15630d71..bbd52a1fdde 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -86,23 +86,26 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 	return bo;
 }
 
-struct r600_bo *r600_bo_handle(struct radeon *radeon,
-			       unsigned handle, unsigned *array_mode)
+struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle,
+			       unsigned *stride, unsigned *array_mode)
 {
 	struct r600_bo *bo = calloc(1, sizeof(struct r600_bo));
 	struct radeon_bo *rbo;
 
-	rbo = bo->bo = radeon_bo(radeon, handle, 0, 0, 0);
+	rbo = bo->bo = radeon_bo(radeon, whandle->handle, 0, 0, 0);
 	if (rbo == NULL) {
 		free(bo);
 		return NULL;
 	}
+
+	pipe_reference_init(&bo->reference, 1);
 	bo->size = rbo->size;
 	bo->domains = (RADEON_GEM_DOMAIN_CPU |
 			RADEON_GEM_DOMAIN_GTT |
 			RADEON_GEM_DOMAIN_VRAM);
 
-	pipe_reference_init(&bo->reference, 1);
+	if (stride)
+		*stride = whandle->stride;
 
 	radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags, &bo->kernel_pitch);
 	if (array_mode) {
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index 3a2186c6f4a..1cf905f2575 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -28,6 +28,7 @@
 
 #include "r600_priv.h"
 #include "r600_drm_public.h"
+#include "util/u_memory.h"
 #include <radeon_drm.h>
 #include <xf86drm.h>
 #include <errno.h>
@@ -244,22 +245,21 @@ static int handle_compare(void *key1, void *key2)
     return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
 }
 
-struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw)
+struct radeon *radeon_create(struct radeon_winsys *ws)
 {
-	struct radeon *radeon;
 	int r;
-
-	radeon = calloc(1, sizeof(*radeon));
+	struct radeon *radeon = CALLOC_STRUCT(radeon);
 	if (radeon == NULL) {
 		return NULL;
 	}
 
-	rw->query_info(rw, &radeon->info);
+	radeon->ws = ws;
+	ws->query_info(ws, &radeon->info);
 
 	radeon->family = radeon_family_from_device(radeon->info.pci_id);
 	if (radeon->family == CHIP_UNKNOWN) {
 		fprintf(stderr, "Unknown chipset 0x%04X\n", radeon->info.pci_id);
-		return radeon_decref(radeon);
+		return radeon_destroy(radeon);
 	}
 	/* setup class */
 	switch (radeon->family) {
@@ -323,7 +323,7 @@ struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw)
 	}
 	r = radeon_init_fence(radeon);
 	if (r) {
-		radeon_decref(radeon);
+		radeon_destroy(radeon);
 		return NULL;
 	}
 
@@ -332,7 +332,7 @@ struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw)
 	return radeon;
 }
 
-struct radeon *radeon_decref(struct radeon *radeon)
+struct radeon *radeon_destroy(struct radeon *radeon)
 {
 	if (radeon == NULL)
 		return NULL;
@@ -346,6 +346,6 @@ struct radeon *radeon_decref(struct radeon *radeon)
 	if (radeon->bomgr)
 		r600_bomgr_destroy(radeon->bomgr);
 
-	free(radeon);
+	FREE(radeon);
 	return NULL;
 }
diff --git a/src/gallium/winsys/r600/drm/r600_drm_public.h b/src/gallium/winsys/r600/drm/r600_drm_public.h
index 1d990f91013..b8a37c7574f 100644
--- a/src/gallium/winsys/r600/drm/r600_drm_public.h
+++ b/src/gallium/winsys/r600/drm/r600_drm_public.h
@@ -28,6 +28,6 @@
 
 struct radeon_winsys;
 
-struct radeon *r600_drm_winsys_create(struct radeon_winsys *rw);
+struct radeon *radeon_create(struct radeon_winsys *ws);
 
 #endif
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 60de24fa67f..8ab4d94a6ee 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -874,6 +874,7 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
 
 	memset(ctx, 0, sizeof(struct r600_context));
 	ctx->radeon = radeon;
+
 	LIST_INITHEAD(&ctx->query_list);
 
 	/* init dirty list */
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 08a3552803c..8296aa1bdd1 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -38,6 +38,7 @@ struct r600_bomgr;
 struct r600_bo;
 
 struct radeon {
+	struct radeon_winsys		*ws;
 	struct radeon_info		info;
 	unsigned			family;
 	enum chip_class			chip_class;

From c092e236cc049cc56e9d0d337defc96729cf5830 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 2 Aug 2011 21:18:10 +0200
Subject: [PATCH 311/600] r600g: remove unused r600_bo::kernel_pitch

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_bo.c   | 2 +-
 src/gallium/winsys/r600/drm/r600_priv.h | 4 +---
 src/gallium/winsys/r600/drm/radeon_bo.c | 4 +---
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index bbd52a1fdde..f74e0a498cd 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -107,7 +107,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan
 	if (stride)
 		*stride = whandle->stride;
 
-	radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags, &bo->kernel_pitch);
+	radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags);
 	if (array_mode) {
 		if (bo->tiling_flags) {
 			if (bo->tiling_flags & RADEON_TILING_MACRO)
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 8296aa1bdd1..50783a61069 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -96,7 +96,6 @@ struct r600_bo {
 	/* DO NOT MOVE THIS ^ */
 	unsigned			size;
 	unsigned			tiling_flags;
-	unsigned			kernel_pitch;
 	unsigned			domains;
 	struct radeon_bo		*bo;
 	unsigned			fence;
@@ -140,8 +139,7 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain
 int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo);
 int radeon_bo_get_tiling_flags(struct radeon *radeon,
 			       struct radeon_bo *bo,
-			       uint32_t *tiling_flags,
-			       uint32_t *pitch);
+			       uint32_t *tiling_flags);
 int radeon_bo_get_name(struct radeon *radeon,
 		       struct radeon_bo *bo,
 		       uint32_t *name);
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index 80336de4537..4e61c942a26 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -220,8 +220,7 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain
 
 int radeon_bo_get_tiling_flags(struct radeon *radeon,
 			       struct radeon_bo *bo,
-			       uint32_t *tiling_flags,
-			       uint32_t *pitch)
+			       uint32_t *tiling_flags)
 {
 	struct drm_radeon_gem_get_tiling args = {};
 	int ret;
@@ -233,7 +232,6 @@ int radeon_bo_get_tiling_flags(struct radeon *radeon,
 		return ret;
 
 	*tiling_flags = args.tiling_flags;
-	*pitch = args.pitch;
 	return ret;
 }
 

From 39db886548b9e93a6a91bf76095848af44972e43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 2 Aug 2011 23:03:11 +0200
Subject: [PATCH 312/600] r600g: remove unused r600_bo::tiling_flags

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_bo.c   | 9 +++++----
 src/gallium/winsys/r600/drm/r600_priv.h | 1 -
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index f74e0a498cd..9e6c7cc7639 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -91,6 +91,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan
 {
 	struct r600_bo *bo = calloc(1, sizeof(struct r600_bo));
 	struct radeon_bo *rbo;
+	unsigned tiling_flags;
 
 	rbo = bo->bo = radeon_bo(radeon, whandle->handle, 0, 0, 0);
 	if (rbo == NULL) {
@@ -107,12 +108,12 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan
 	if (stride)
 		*stride = whandle->stride;
 
-	radeon_bo_get_tiling_flags(radeon, rbo, &bo->tiling_flags);
+	radeon_bo_get_tiling_flags(radeon, rbo, &tiling_flags);
 	if (array_mode) {
-		if (bo->tiling_flags) {
-			if (bo->tiling_flags & RADEON_TILING_MACRO)
+		if (tiling_flags) {
+			if (tiling_flags & RADEON_TILING_MACRO)
 				*array_mode = V_0280A0_ARRAY_2D_TILED_THIN1;
-			else if (bo->tiling_flags & RADEON_TILING_MICRO)
+			else if (tiling_flags & RADEON_TILING_MICRO)
 				*array_mode = V_0280A0_ARRAY_1D_TILED_THIN1;
 		} else {
 			*array_mode = 0;
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 50783a61069..c310defe2b1 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -95,7 +95,6 @@ struct r600_bo {
 	struct pipe_reference		reference; /* this must be the first member for the r600_bo_reference inline to work */
 	/* DO NOT MOVE THIS ^ */
 	unsigned			size;
-	unsigned			tiling_flags;
 	unsigned			domains;
 	struct radeon_bo		*bo;
 	unsigned			fence;

From edca57e53318121a7bd909c0c36dcffe5d9ac15f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 2 Aug 2011 23:58:57 +0200
Subject: [PATCH 313/600] r600g: remove unused function declarations

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_priv.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index c310defe2b1..ee3df9c93f9 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -115,12 +115,6 @@ struct r600_bomgr {
 	unsigned			num_delayed;
 };
 
-/*
- * r600_drm.c
- */
-struct radeon *r600_new(int fd, unsigned device);
-void r600_delete(struct radeon *r600);
-
 /*
  * radeon_pciid.c
  */

From 11daa7e325cc2653269d3c01844ff77fdd515de9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 3 Aug 2011 01:03:13 +0200
Subject: [PATCH 314/600] r600g: allocate/destroy buffers using radeon_winsys

We use the cache buffer manager from radeon_winsys now, but we don't use
anything else yet.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_bo.c         |  4 +-
 src/gallium/winsys/r600/drm/r600_drm.c        | 16 ----
 src/gallium/winsys/r600/drm/r600_priv.h       | 10 +--
 src/gallium/winsys/r600/drm/radeon_bo.c       | 77 +++++--------------
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c |  7 ++
 src/gallium/winsys/radeon/drm/radeon_winsys.h |  4 +
 6 files changed, 35 insertions(+), 83 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 9e6c7cc7639..8cf1d809235 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -68,7 +68,7 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 		initial_domain = RADEON_GEM_DOMAIN_VRAM;
 		break;
 	}
-	rbo = radeon_bo(radeon, 0, size, alignment, initial_domain);
+	rbo = radeon_bo(radeon, 0, size, alignment, binding, initial_domain);
 	if (rbo == NULL) {
 		return NULL;
 	}
@@ -93,7 +93,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan
 	struct radeon_bo *rbo;
 	unsigned tiling_flags;
 
-	rbo = bo->bo = radeon_bo(radeon, whandle->handle, 0, 0, 0);
+	rbo = bo->bo = radeon_bo(radeon, whandle->handle, 0, 0, 0, 0);
 	if (rbo == NULL) {
 		free(bo);
 		return NULL;
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index 1cf905f2575..f2e46d3d0d9 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -233,18 +233,6 @@ static int radeon_init_fence(struct radeon *radeon)
 	return 0;
 }
 
-#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
-
-static unsigned handle_hash(void *key)
-{
-    return PTR_TO_UINT(key);
-}
-
-static int handle_compare(void *key1, void *key2)
-{
-    return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
-}
-
 struct radeon *radeon_create(struct radeon_winsys *ws)
 {
 	int r;
@@ -327,8 +315,6 @@ struct radeon *radeon_create(struct radeon_winsys *ws)
 		return NULL;
 	}
 
-	radeon->bo_handles = util_hash_table_create(handle_hash, handle_compare);
-	pipe_mutex_init(radeon->bo_handles_mutex);
 	return radeon;
 }
 
@@ -337,8 +323,6 @@ struct radeon *radeon_destroy(struct radeon *radeon)
 	if (radeon == NULL)
 		return NULL;
 
-	util_hash_table_destroy(radeon->bo_handles);
-	pipe_mutex_destroy(radeon->bo_handles_mutex);
 	if (radeon->fence_bo) {
 		r600_bo_reference(radeon, &radeon->fence_bo, NULL);
 	}
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index ee3df9c93f9..621c7eb2158 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -50,10 +50,6 @@ struct radeon {
 	unsigned			num_tile_pipes;
 	unsigned			backend_map;
 	boolean				backend_map_valid;
-
-        /* List of buffer handles and its mutex. */
-	struct util_hash_table          *bo_handles;
-	pipe_mutex bo_handles_mutex;
 };
 
 /* these flags are used in register flags and added into block flags */
@@ -75,9 +71,10 @@ struct r600_reg {
 #define BO_BOUND_TEXTURE 1
 struct radeon_bo {
 	struct pipe_reference		reference;
+	struct pb_buffer		*buf;
+
 	unsigned			handle;
 	unsigned			size;
-	unsigned			alignment;
 	int				map_count;
 	void				*data;
 	struct list_head		fencedlist;
@@ -87,7 +84,6 @@ struct radeon_bo {
 	struct r600_reloc		*reloc;
 	unsigned			reloc_id;
 	unsigned			last_flush;
-	unsigned                        name;
 	unsigned                        binding;
 };
 
@@ -124,7 +120,7 @@ unsigned radeon_family_from_device(unsigned device);
  * radeon_bo.c
  */
 struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
-			    unsigned size, unsigned alignment, unsigned initial_domain);
+			    unsigned size, unsigned alignment, unsigned bind, unsigned initial_domain);
 void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst,
 			 struct radeon_bo *src);
 int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo);
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index 4e61c942a26..7ff4806c056 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -26,6 +26,7 @@
 #define _FILE_OFFSET_BITS 64
 #include "r600_priv.h"
 #include "util/u_hash_table.h"
+#include "util/u_memory.h"
 #include "radeon_drm.h"
 #include "xf86drm.h"
 #include <sys/mman.h>
@@ -68,22 +69,16 @@ static void radeon_bo_fixed_unmap(struct radeon *radeon, struct radeon_bo *bo)
 	}
 }
 
+#include "state_tracker/drm_driver.h"
+
 struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
-			    unsigned size, unsigned alignment, unsigned initial_domain)
+			    unsigned size, unsigned alignment, unsigned bind,
+			    unsigned initial_domain)
 {
 	struct radeon_bo *bo;
-	int r;
+	struct winsys_handle whandle = {};
+	whandle.handle = handle;
 
-	if (handle) {
-		pipe_mutex_lock(radeon->bo_handles_mutex);
-		bo = util_hash_table_get(radeon->bo_handles,
-					 (void *)(uintptr_t)handle);
-		if (bo) {
-			struct radeon_bo *b = NULL;
-			radeon_bo_reference(radeon, &b, bo);
-			goto done;
-		}
-	}
 	bo = calloc(1, sizeof(*bo));
 	if (bo == NULL) {
 		return NULL;
@@ -91,69 +86,35 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
 	bo->size = size;
 	bo->handle = handle;
 	pipe_reference_init(&bo->reference, 1);
-	bo->alignment = alignment;
 	LIST_INITHEAD(&bo->fencedlist);
 
 	if (handle) {
-		struct drm_gem_open open_arg;
-
-		memset(&open_arg, 0, sizeof(open_arg));
-		open_arg.name = handle;
-		r = drmIoctl(radeon->info.fd, DRM_IOCTL_GEM_OPEN, &open_arg);
-		if (r != 0) {
-			free(bo);
+		unsigned size;
+		bo->buf = radeon->ws->buffer_from_handle(radeon->ws, &whandle, NULL, &size);
+		if (!bo->buf) {
+			FREE(bo);
 			return NULL;
 		}
-		bo->name = handle;
-		bo->handle = open_arg.handle;
-		bo->size = open_arg.size;
+		bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf);
+		bo->size = size;
 		bo->shared = TRUE;
 	} else {
-		struct drm_radeon_gem_create args = {};
-
-		args.size = size;
-		args.alignment = alignment;
-		args.initial_domain = initial_domain;
-		args.flags = 0;
-		args.handle = 0;
-		r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_CREATE,
-					&args, sizeof(args));
-		bo->handle = args.handle;
-		if (r) {
-			fprintf(stderr, "Failed to allocate :\n");
-			fprintf(stderr, "   size      : %d bytes\n", size);
-			fprintf(stderr, "   alignment : %d bytes\n", alignment);
-			free(bo);
+		bo->buf = radeon->ws->buffer_create(radeon->ws, size, alignment, bind, initial_domain);
+		if (!bo->buf) {
+			FREE(bo);
 			return NULL;
 		}
+		bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf);
 	}
-
-	if (handle)
-		util_hash_table_set(radeon->bo_handles, (void *)(uintptr_t)handle, bo);
-done:
-	if (handle)
-		pipe_mutex_unlock(radeon->bo_handles_mutex);
-
 	return bo;
 }
 
 static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo)
 {
-	struct drm_gem_close args;
-
-	if (bo->name) {
-		pipe_mutex_lock(radeon->bo_handles_mutex);
-		util_hash_table_remove(radeon->bo_handles,
-				       (void *)(uintptr_t)bo->name);
-		pipe_mutex_unlock(radeon->bo_handles_mutex);
-	}
 	LIST_DEL(&bo->fencedlist);
 	radeon_bo_fixed_unmap(radeon, bo);
-	memset(&args, 0, sizeof(args));
-	args.handle = bo->handle;
-	drmIoctl(radeon->info.fd, DRM_IOCTL_GEM_CLOSE, &args);
-	memset(bo, 0, sizeof(struct radeon_bo));
-	free(bo);
+	pb_reference(&bo->buf, NULL);
+	FREE(bo);
 }
 
 void radeon_bo_reference(struct radeon *radeon,
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 609a9065db8..58898d3423e 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -617,6 +617,11 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
     return TRUE;
 }
 
+static unsigned trans_get_buffer_handle(struct pb_buffer *buf)
+{
+	return get_radeon_bo(buf)->handle;
+}
+
 void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
 {
     ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
@@ -629,4 +634,6 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
     ws->base.buffer_create = radeon_winsys_bo_create;
     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
+
+    ws->base.trans_get_buffer_handle = trans_get_buffer_handle;
 }
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 2948ea78c18..2e5000702e0 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -325,6 +325,10 @@ struct radeon_winsys {
     boolean (*cs_request_feature)(struct radeon_winsys_cs *cs,
                                   enum radeon_feature_id fid,
                                   boolean enable);
+
+
+    /* Transitional functions for r600g when moving to winsys/radeon */
+    unsigned (*trans_get_buffer_handle)(struct pb_buffer *buf);
 };
 
 #endif

From 60ff68ad00b821eb04ebb5e0d83620421ee16deb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 3 Aug 2011 01:34:39 +0200
Subject: [PATCH 315/600] r600g: remove the cache buffer manager from
 winsys/r600

As we've just started using the one from winsys/radeon.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/Makefile     |   3 +-
 src/gallium/winsys/r600/drm/SConscript   |   1 -
 src/gallium/winsys/r600/drm/r600_bo.c    |  22 +---
 src/gallium/winsys/r600/drm/r600_bomgr.c | 159 -----------------------
 src/gallium/winsys/r600/drm/r600_drm.c   |   7 -
 src/gallium/winsys/r600/drm/r600_priv.h  |  34 -----
 6 files changed, 3 insertions(+), 223 deletions(-)
 delete mode 100644 src/gallium/winsys/r600/drm/r600_bomgr.c

diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile
index 1d0de31c65a..e5b58d6cf87 100644
--- a/src/gallium/winsys/r600/drm/Makefile
+++ b/src/gallium/winsys/r600/drm/Makefile
@@ -10,8 +10,7 @@ C_SOURCES = \
 	radeon_pciid.c \
 	r600_bo.c \
 	r600_drm.c \
-	r600_hw_context.c \
-	r600_bomgr.c
+	r600_hw_context.c
 
 LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r600 \
 		   -I$(TOP)/include \
diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript
index efcedc6bff9..3665b6eaeef 100644
--- a/src/gallium/winsys/r600/drm/SConscript
+++ b/src/gallium/winsys/r600/drm/SConscript
@@ -9,7 +9,6 @@ r600_sources = [
     'r600_bo.c',
     'r600_drm.c',
     'r600_hw_context.c',
-    'r600_bomgr.c',
 ]
 
 env.PkgUseModules('DRM_RADEON')
diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 8cf1d809235..4918897be67 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -47,14 +47,6 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 				RADEON_GEM_DOMAIN_GTT |
 				RADEON_GEM_DOMAIN_VRAM);
 
-	if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
-		bo = r600_bomgr_bo_create(radeon->bomgr, size, alignment, *radeon->cfence);
-		if (bo) {
-			bo->domains = domains;
-			return bo;
-		}
-	}
-
 	switch(usage) {
 	case PIPE_USAGE_DYNAMIC:
 	case PIPE_USAGE_STREAM:
@@ -75,12 +67,8 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 
 	bo = calloc(1, sizeof(struct r600_bo));
 	bo->size = size;
-	bo->alignment = alignment;
 	bo->domains = domains;
 	bo->bo = rbo;
-	if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
-		r600_bomgr_bo_init(radeon->bomgr, bo);
-	}
 
 	pipe_reference_init(&bo->reference, 1);
 	return bo;
@@ -128,7 +116,7 @@ void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, voi
 
 	if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
 		radeon_bo_map(radeon, bo->bo);
-		return (uint8_t *) bo->bo->data + bo->offset;
+		return (uint8_t *) bo->bo->data;
 	}
 
 	if (p_atomic_read(&bo->bo->reference.count) > 1) {
@@ -158,7 +146,7 @@ void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, voi
 	}
 
 out:
-	return (uint8_t *) bo->bo->data + bo->offset;
+	return (uint8_t *) bo->bo->data;
 }
 
 void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo)
@@ -168,12 +156,6 @@ void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo)
 
 void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo)
 {
-	if (bo->manager_id) {
-		if (!r600_bomgr_bo_destroy(radeon->bomgr, bo)) {
-			/* destroy is delayed by buffer manager */
-			return;
-		}
-	}
 	radeon_bo_reference(radeon, &bo->bo, NULL);
 	free(bo);
 }
diff --git a/src/gallium/winsys/r600/drm/r600_bomgr.c b/src/gallium/winsys/r600/drm/r600_bomgr.c
deleted file mode 100644
index 5cea7654d97..00000000000
--- a/src/gallium/winsys/r600/drm/r600_bomgr.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright 2010 VMWare.
- * Copyright 2010 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *      Jose Fonseca <jrfonseca-at-vmware-dot-com>
- *      Thomas Hellström <thomas-at-vmware-dot-com>
- *      Jerome Glisse <jglisse@redhat.com>
- */
-#include "r600_priv.h"
-#include "util/u_memory.h"
-#include "util/u_time.h"
-
-static void r600_bomgr_timeout_flush(struct r600_bomgr *mgr)
-{
-	struct r600_bo *bo, *tmp;
-	int64_t now;
-
-	now = os_time_get();
-	LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) {
-		if(!os_time_timeout(bo->start, bo->end, now))
-			break;
-
-		mgr->num_delayed--;
-		bo->manager_id = 0;
-		LIST_DEL(&bo->list);
-		r600_bo_destroy(mgr->radeon, bo);
-	}
-}
-
-static INLINE int r600_bo_is_compat(struct r600_bomgr *mgr,
-					struct r600_bo *bo,
-					unsigned size,
-					unsigned alignment,
-					unsigned cfence)
-{
-	if(bo->size < size) {
-		return 0;
-	}
-
-	/* be lenient with size */
-	if(bo->size >= 2*size) {
-		return 0;
-	}
-
-	if(!pb_check_alignment(alignment, bo->alignment)) {
-		return 0;
-	}
-
-	if (!fence_is_after(cfence, bo->fence)) {
-		return 0;
-	}
-
-	return 1;
-}
-
-struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr,
-					unsigned size,
-					unsigned alignment,
-					unsigned cfence)
-{
-	struct r600_bo *bo, *tmp;
-	int64_t now;
-
-
-	pipe_mutex_lock(mgr->mutex);
-
-	now = os_time_get();
-	LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) {
-		if(r600_bo_is_compat(mgr, bo, size, alignment, cfence)) {
-			LIST_DEL(&bo->list);
-			--mgr->num_delayed;
-			r600_bomgr_timeout_flush(mgr);
-			pipe_mutex_unlock(mgr->mutex);
-			LIST_INITHEAD(&bo->list);
-			pipe_reference_init(&bo->reference, 1);
-			return bo;
-		}
-
-		if(os_time_timeout(bo->start, bo->end, now)) {
-			mgr->num_delayed--;
-			bo->manager_id = 0;
-			LIST_DEL(&bo->list);
-			r600_bo_destroy(mgr->radeon, bo);
-		}
-	}
-
-	pipe_mutex_unlock(mgr->mutex);
-	return NULL;
-}
-
-void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo)
-{
-	LIST_INITHEAD(&bo->list);
-	bo->manager_id = 1;
-}
-
-boolean r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo)
-{
-	bo->start = os_time_get();
-	bo->end = bo->start + mgr->usecs;
-	pipe_mutex_lock(mgr->mutex);
-	LIST_ADDTAIL(&bo->list, &mgr->delayed);
-	++mgr->num_delayed;
-	pipe_mutex_unlock(mgr->mutex);
-	return FALSE;
-}
-
-void r600_bomgr_destroy(struct r600_bomgr *mgr)
-{
-	struct r600_bo *bo, *tmp;
-
-	pipe_mutex_lock(mgr->mutex);
-	LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &mgr->delayed, list) {
-		mgr->num_delayed--;
-		bo->manager_id = 0;
-		LIST_DEL(&bo->list);
-		r600_bo_destroy(mgr->radeon, bo);
-	}
-	pipe_mutex_unlock(mgr->mutex);
-
-	FREE(mgr);
-}
-
-struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs)
-{
-	struct r600_bomgr *mgr;
-
-	mgr = CALLOC_STRUCT(r600_bomgr);
-	if (mgr == NULL)
-		return NULL;
-
-	mgr->radeon = radeon;
-	mgr->usecs = usecs;
-	LIST_INITHEAD(&mgr->delayed);
-	mgr->num_delayed = 0;
-	pipe_mutex_init(mgr->mutex);
-
-	return mgr;
-}
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index f2e46d3d0d9..390f51a561c 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -305,10 +305,6 @@ struct radeon *radeon_create(struct radeon_winsys *ws)
 		radeon_get_backend_map(radeon);
 	}
 
-	radeon->bomgr = r600_bomgr_create(radeon, 1000000);
-	if (radeon->bomgr == NULL) {
-		return NULL;
-	}
 	r = radeon_init_fence(radeon);
 	if (r) {
 		radeon_destroy(radeon);
@@ -327,9 +323,6 @@ struct radeon *radeon_destroy(struct radeon *radeon)
 		r600_bo_reference(radeon, &radeon->fence_bo, NULL);
 	}
 
-	if (radeon->bomgr)
-		r600_bomgr_destroy(radeon->bomgr);
-
 	FREE(radeon);
 	return NULL;
 }
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 621c7eb2158..99c238e04e6 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -34,7 +34,6 @@
 #define PKT_COUNT_C                     0xC000FFFF
 #define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
 
-struct r600_bomgr;
 struct r600_bo;
 
 struct radeon {
@@ -43,7 +42,6 @@ struct radeon {
 	unsigned			family;
 	enum chip_class			chip_class;
 	struct r600_tiling_info		tiling_info;
-	struct r600_bomgr		*bomgr;
 	unsigned			fence;
 	unsigned			*cfence;
 	struct r600_bo			*fence_bo;
@@ -94,21 +92,6 @@ struct r600_bo {
 	unsigned			domains;
 	struct radeon_bo		*bo;
 	unsigned			fence;
-	/* manager data */
-	struct list_head		list;
-	unsigned			manager_id;
-	unsigned			alignment;
-	unsigned			offset;
-	int64_t				start;
-	int64_t				end;
-};
-
-struct r600_bomgr {
-	struct radeon			*radeon;
-	unsigned			usecs;
-	pipe_mutex			mutex;
-	struct list_head		delayed;
-	unsigned			num_delayed;
 };
 
 /*
@@ -174,23 +157,6 @@ static INLINE void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, str
  */
 void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo);
 
-/*
- * r600_bomgr.c
- */
-struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs);
-void r600_bomgr_destroy(struct r600_bomgr *mgr);
-boolean r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo);
-void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo);
-struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr,
-					unsigned size,
-					unsigned alignment,
-					unsigned cfence);
-
-
-/*
- * helpers
- */
-
 
 /*
  * radeon_bo.c

From cdbb8a195a3581faa7b569997fa84fb9bceffaa4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 3 Aug 2011 01:47:57 +0200
Subject: [PATCH 316/600] r600g: remove now-unused r600_bo::size

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_bo.c   | 2 --
 src/gallium/winsys/r600/drm/r600_priv.h | 1 -
 2 files changed, 3 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 4918897be67..9fc799a1586 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -66,7 +66,6 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 	}
 
 	bo = calloc(1, sizeof(struct r600_bo));
-	bo->size = size;
 	bo->domains = domains;
 	bo->bo = rbo;
 
@@ -88,7 +87,6 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan
 	}
 
 	pipe_reference_init(&bo->reference, 1);
-	bo->size = rbo->size;
 	bo->domains = (RADEON_GEM_DOMAIN_CPU |
 			RADEON_GEM_DOMAIN_GTT |
 			RADEON_GEM_DOMAIN_VRAM);
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 99c238e04e6..0b757a84953 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -88,7 +88,6 @@ struct radeon_bo {
 struct r600_bo {
 	struct pipe_reference		reference; /* this must be the first member for the r600_bo_reference inline to work */
 	/* DO NOT MOVE THIS ^ */
-	unsigned			size;
 	unsigned			domains;
 	struct radeon_bo		*bo;
 	unsigned			fence;

From 591d8c33502463b816428d18ca779faa282a5a25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 3 Aug 2011 01:59:02 +0200
Subject: [PATCH 317/600] r600g: remove the fences which were used for the
 cache buffer manager

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_drm.c        | 23 ------
 src/gallium/winsys/r600/drm/r600_hw_context.c | 77 +------------------
 src/gallium/winsys/r600/drm/r600_priv.h       | 12 +--
 src/gallium/winsys/r600/drm/radeon_bo.c       | 43 ++---------
 4 files changed, 10 insertions(+), 145 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index 390f51a561c..f0ef55e98d5 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -221,21 +221,8 @@ static int radeon_get_backend_map(struct radeon *radeon)
 	return 0;
 }
 
-static int radeon_init_fence(struct radeon *radeon)
-{
-	radeon->fence = 1;
-	radeon->fence_bo = r600_bo(radeon, 4096, 0, 0, 0);
-	if (radeon->fence_bo == NULL) {
-		return -ENOMEM;
-	}
-	radeon->cfence = r600_bo_map(radeon, radeon->fence_bo, PIPE_TRANSFER_UNSYNCHRONIZED, NULL);
-	*radeon->cfence = 0;
-	return 0;
-}
-
 struct radeon *radeon_create(struct radeon_winsys *ws)
 {
-	int r;
 	struct radeon *radeon = CALLOC_STRUCT(radeon);
 	if (radeon == NULL) {
 		return NULL;
@@ -305,12 +292,6 @@ struct radeon *radeon_create(struct radeon_winsys *ws)
 		radeon_get_backend_map(radeon);
 	}
 
-	r = radeon_init_fence(radeon);
-	if (r) {
-		radeon_destroy(radeon);
-		return NULL;
-	}
-
 	return radeon;
 }
 
@@ -319,10 +300,6 @@ struct radeon *radeon_destroy(struct radeon *radeon)
 	if (radeon == NULL)
 		return NULL;
 
-	if (radeon->fence_bo) {
-		r600_bo_reference(radeon, &radeon->fence_bo, NULL);
-	}
-
 	FREE(radeon);
 	return NULL;
 }
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 8ab4d94a6ee..e125fc82e3c 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -143,32 +143,6 @@ void r600_init_cs(struct r600_context *ctx)
 	ctx->init_dwords = ctx->pm4_cdwords;
 }
 
-static void INLINE r600_context_update_fenced_list(struct r600_context *ctx)
-{
-	for (int i = 0; i < ctx->creloc; i++) {
-		if (!LIST_IS_EMPTY(&ctx->bo[i]->fencedlist))
-			LIST_DELINIT(&ctx->bo[i]->fencedlist);
-		LIST_ADDTAIL(&ctx->bo[i]->fencedlist, &ctx->fenced_bo);
-		ctx->bo[i]->fence = ctx->radeon->fence;
-		ctx->bo[i]->ctx = ctx;
-	}
-}
-
-static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsigned fence)
-{
-	struct radeon_bo *bo = NULL;
-	struct radeon_bo *tmp;
-
-	LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) {
-		if (bo->fence <= *ctx->radeon->cfence) {
-			LIST_DELINIT(&bo->fencedlist);
-			bo->fence = 0;
-		} else {
-			bo->fence = fence;
-		}
-	}
-}
-
 static void r600_init_block(struct r600_context *ctx,
 			    struct r600_block *block,
 			    const struct r600_reg *reg, int index, int nreg,
@@ -757,17 +731,6 @@ static int r600_loop_const_init(struct r600_context *ctx, u32 offset)
 	return r600_context_add_block(ctx, r600_loop_consts, nreg, PKT3_SET_LOOP_CONST, R600_LOOP_CONST_OFFSET);
 }
 
-static void r600_context_clear_fenced_bo(struct r600_context *ctx)
-{
-	struct radeon_bo *bo, *tmp;
-
-	LIST_FOR_EACH_ENTRY_SAFE(bo, tmp, &ctx->fenced_bo, fencedlist) {
-		LIST_DELINIT(&bo->fencedlist);
-		bo->fence = 0;
-		bo->ctx = NULL;
-	}
-}
-
 static void r600_free_resource_range(struct r600_context *ctx, struct r600_range *range, int nblocks)
 {
 	struct r600_block *block;
@@ -817,7 +780,6 @@ void r600_context_fini(struct r600_context *ctx)
 	free(ctx->bo);
 	free(ctx->pm4);
 
-	r600_context_clear_fenced_bo(ctx);
 	memset(ctx, 0, sizeof(struct r600_context));
 }
 
@@ -1058,7 +1020,6 @@ void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo)
 	ctx->reloc[ctx->creloc].write_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM);
 	ctx->reloc[ctx->creloc].flags = 0;
 	radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo);
-	rbo->fence = ctx->radeon->fence;
 	ctx->creloc++;
 }
 
@@ -1138,7 +1099,6 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
 			/* find relocation */
 			reloc_id = block->pm4_bo_index[id];
 			r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, reg->bo);
-			reg->bo->fence = ctx->radeon->fence;
 			/* always force dirty for relocs for now */
 			dirty |= R600_BLOCK_STATUS_DIRTY;
 		}
@@ -1205,31 +1165,21 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_
 				dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
 		}
 	}
-	if (!dirty) {
-		if (is_vertex)
-			state->bo[0]->fence = ctx->radeon->fence;
-		else {
-			state->bo[0]->fence = ctx->radeon->fence;
-			state->bo[1]->fence = ctx->radeon->fence;
-		}
-	} else {
+
+	if (dirty) {
 		if (is_vertex) {
 			/* VERTEX RESOURCE, we preted there is 2 bo to relocate so
 			 * we have single case btw VERTEX & TEXTURE resource
 			 */
 			r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]);
 			r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL);
-			state->bo[0]->fence = ctx->radeon->fence;
 		} else {
 			/* TEXTURE RESOURCE */
 			r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]);
 			r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[1]);
-			state->bo[0]->fence = ctx->radeon->fence;
-			state->bo[1]->fence = ctx->radeon->fence;
 			state->bo[0]->bo->binding |= BO_BOUND_TEXTURE;
 		}
-	}
-	if (dirty) {
+
 		if (is_vertex)
 			block->status |= R600_BLOCK_STATUS_RESOURCE_VERTEX;
 		else
@@ -1574,7 +1524,6 @@ void r600_context_flush(struct r600_context *ctx)
 	struct drm_radeon_cs drmib = {};
 	struct drm_radeon_cs_chunk chunks[2];
 	uint64_t chunk_array[2];
-	unsigned fence;
 	int r;
 	struct r600_block *enable_block = NULL;
 
@@ -1592,16 +1541,6 @@ void r600_context_flush(struct r600_context *ctx)
 	/* partial flush is needed to avoid lockups on some chips with user fences */
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
 	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
-	/* emit fence */
-	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-	ctx->pm4[ctx->pm4_cdwords++] = 0;
-	ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24);
-	ctx->pm4[ctx->pm4_cdwords++] = ctx->radeon->fence;
-	ctx->pm4[ctx->pm4_cdwords++] = 0;
-	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = 0;
-	r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], ctx->radeon->fence_bo);
 
 #if 1
 	/* emit cs */
@@ -1625,16 +1564,6 @@ void r600_context_flush(struct r600_context *ctx)
 	*ctx->radeon->cfence = ctx->radeon->fence;
 #endif
 
-	r600_context_update_fenced_list(ctx);
-
-	fence = ctx->radeon->fence + 1;
-	if (fence < ctx->radeon->fence) {
-		/* wrap around */
-		fence = 1;
-		r600_context_fence_wraparound(ctx, fence);
-	}
-	ctx->radeon->fence = fence;
-
 	/* restart */
 	for (int i = 0; i < ctx->creloc; i++) {
 		ctx->bo[i]->reloc = NULL;
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 0b757a84953..49d3060bbf2 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -34,17 +34,12 @@
 #define PKT_COUNT_C                     0xC000FFFF
 #define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
 
-struct r600_bo;
-
 struct radeon {
 	struct radeon_winsys		*ws;
 	struct radeon_info		info;
 	unsigned			family;
 	enum chip_class			chip_class;
 	struct r600_tiling_info		tiling_info;
-	unsigned			fence;
-	unsigned			*cfence;
-	struct r600_bo			*fence_bo;
 	unsigned			num_tile_pipes;
 	unsigned			backend_map;
 	boolean				backend_map_valid;
@@ -70,15 +65,11 @@ struct r600_reg {
 struct radeon_bo {
 	struct pipe_reference		reference;
 	struct pb_buffer		*buf;
-
 	unsigned			handle;
 	unsigned			size;
 	int				map_count;
 	void				*data;
-	struct list_head		fencedlist;
-	unsigned			fence;
-	struct r600_context		*ctx;
-	boolean				shared;
+
 	struct r600_reloc		*reloc;
 	unsigned			reloc_id;
 	unsigned			last_flush;
@@ -90,7 +81,6 @@ struct r600_bo {
 	/* DO NOT MOVE THIS ^ */
 	unsigned			domains;
 	struct radeon_bo		*bo;
-	unsigned			fence;
 };
 
 /*
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index 7ff4806c056..63dc44ddb44 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -83,35 +83,24 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
 	if (bo == NULL) {
 		return NULL;
 	}
-	bo->size = size;
-	bo->handle = handle;
 	pipe_reference_init(&bo->reference, 1);
-	LIST_INITHEAD(&bo->fencedlist);
 
 	if (handle) {
-		unsigned size;
 		bo->buf = radeon->ws->buffer_from_handle(radeon->ws, &whandle, NULL, &size);
-		if (!bo->buf) {
-			FREE(bo);
-			return NULL;
-		}
-		bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf);
-		bo->size = size;
-		bo->shared = TRUE;
 	} else {
 		bo->buf = radeon->ws->buffer_create(radeon->ws, size, alignment, bind, initial_domain);
-		if (!bo->buf) {
-			FREE(bo);
-			return NULL;
-		}
-		bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf);
 	}
+	if (!bo->buf) {
+		FREE(bo);
+		return NULL;
+	}
+	bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf);
+	bo->size = size;
 	return bo;
 }
 
 static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo)
 {
-	LIST_DEL(&bo->fencedlist);
 	radeon_bo_fixed_unmap(radeon, bo);
 	pb_reference(&bo->buf, NULL);
 	FREE(bo);
@@ -133,16 +122,6 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo)
 	struct drm_radeon_gem_wait_idle args;
 	int ret;
 
-        if (!bo->shared) {
-                if (!bo->fence)
-			return 0;
-		if (bo->fence <= *radeon->cfence) {
-			LIST_DELINIT(&bo->fencedlist);
-			bo->fence = 0;
-			return 0;
-		}
-        }
-
 	/* Zero out args to make valgrind happy */
 	memset(&args, 0, sizeof(args));
 	args.handle = bo->handle;
@@ -158,16 +137,6 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain
 	struct drm_radeon_gem_busy args;
 	int ret;
 
-	if (!bo->shared) {
-		if (!bo->fence)
-			return 0;
-		if (bo->fence <= *radeon->cfence) {
-			LIST_DELINIT(&bo->fencedlist);
-			bo->fence = 0;
-			return 0;
-		}
-	}
-
 	memset(&args, 0, sizeof(args));
 	args.handle = bo->handle;
 	args.domain = 0;

From 9865b1ec8c88154d8ceb982c42844b68e9224217 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 3 Aug 2011 02:24:15 +0200
Subject: [PATCH 318/600] r600g: remove now-unused r600_context::fenced_bo

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600.h                    | 2 +-
 src/gallium/winsys/r600/drm/evergreen_hw_context.c | 4 ----
 src/gallium/winsys/r600/drm/r600_hw_context.c      | 4 ----
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 7e704730122..043215b3ec7 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -254,11 +254,11 @@ struct r600_context {
 	unsigned		creloc;
 	struct r600_reloc	*reloc;
 	struct radeon_bo	**bo;
+
 	u32			*pm4;
 	struct list_head	query_list;
 	unsigned		num_query_running;
 	unsigned		backend_mask;
-	struct list_head	fenced_bo;
 	unsigned                max_db; /* for OQ */
 	unsigned                num_dest_buffers;
 	unsigned		flags;
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 94206d5568b..df89047ebc5 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -1008,13 +1008,9 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
 	r600_init_cs(ctx);
 	/* save 16dwords space for fence mecanism */
 	ctx->pm4_ndwords -= 16;
-
 	ctx->max_db = 8;
 
-	LIST_INITHEAD(&ctx->fenced_bo);
-
 	r600_get_backend_mask(ctx);
-
 	return 0;
 out_err:
 	r600_context_fini(ctx);
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index e125fc82e3c..4d8bb184cbd 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -934,13 +934,9 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
 	r600_init_cs(ctx);
 	/* save 16dwords space for fence mecanism */
 	ctx->pm4_ndwords -= 16;
-
-	LIST_INITHEAD(&ctx->fenced_bo);
-
 	ctx->max_db = 4;
 
 	r600_get_backend_mask(ctx);
-
 	return 0;
 out_err:
 	r600_context_fini(ctx);

From 638d75185e66727faaba5dc2df1b6e14c7c0c075 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 3 Aug 2011 04:31:02 +0200
Subject: [PATCH 319/600] r600g: let radeon_winsys maintain the list of
 relocations

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600.h               |  3 +-
 .../winsys/r600/drm/evergreen_hw_context.c    | 14 ++-----
 src/gallium/winsys/r600/drm/r600_hw_context.c | 37 +++++++----------
 src/gallium/winsys/r600/drm/r600_priv.h       |  1 +
 src/gallium/winsys/r600/drm/radeon_bo.c       |  1 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 40 +++++++++++++++----
 src/gallium/winsys/radeon/drm/radeon_winsys.h |  5 +++
 7 files changed, 59 insertions(+), 42 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 043215b3ec7..3ac60bce611 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -239,6 +239,7 @@ struct r600_query {
 
 struct r600_context {
 	struct radeon		*radeon;
+	struct radeon_winsys_cs	*cs;
 	struct r600_range	*range;
 	unsigned		nblocks;
 	struct r600_block	**blocks;
@@ -250,7 +251,7 @@ struct r600_context {
 	unsigned		pm4_dirty_cdwords;
 	unsigned		ctx_pm4_ndwords;
 	unsigned		init_dwords;
-	unsigned		nreloc;
+
 	unsigned		creloc;
 	struct r600_reloc	*reloc;
 	struct radeon_bo	**bo;
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index df89047ebc5..3f6f8b5368b 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -986,14 +986,10 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
 	if (r)
 		goto out_err;
 
+	ctx->cs = radeon->ws->cs_create(radeon->ws);
+
 	/* allocate cs variables */
-	ctx->nreloc = RADEON_CTX_MAX_PM4;
-	ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc));
-	if (ctx->reloc == NULL) {
-		r = -ENOMEM;
-		goto out_err;
-	}
-	ctx->bo = calloc(ctx->nreloc, sizeof(void *));
+	ctx->bo = calloc(RADEON_CTX_MAX_PM4, sizeof(void *));
 	if (ctx->bo == NULL) {
 		r = -ENOMEM;
 		goto out_err;
@@ -1146,10 +1142,6 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 
 	if (draw->indices) {
 		ndwords = 11;
-		/* make sure there is enough relocation space before scheduling draw */
-		if (ctx->creloc >= (ctx->nreloc - 1)) {
-			r600_context_flush(ctx);
-		}
 	}
 
 	/* queries need some special values */
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 4d8bb184cbd..bba55d68267 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -776,9 +776,9 @@ void r600_context_fini(struct r600_context *ctx)
 	r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources);
 	free(ctx->range);
 	free(ctx->blocks);
-	free(ctx->reloc);
 	free(ctx->bo);
 	free(ctx->pm4);
+	ctx->radeon->ws->cs_destroy(ctx->cs);
 
 	memset(ctx, 0, sizeof(struct r600_context));
 }
@@ -912,14 +912,10 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
 	if (r)
 		goto out_err;
 
+	ctx->cs = radeon->ws->cs_create(radeon->ws);
+
 	/* allocate cs variables */
-	ctx->nreloc = RADEON_CTX_MAX_PM4;
-	ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc));
-	if (ctx->reloc == NULL) {
-		r = -ENOMEM;
-		goto out_err;
-	}
-	ctx->bo = calloc(ctx->nreloc, sizeof(void *));
+	ctx->bo = calloc(RADEON_CTX_MAX_PM4, sizeof(void *));
 	if (ctx->bo == NULL) {
 		r = -ENOMEM;
 		goto out_err;
@@ -1009,14 +1005,15 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo)
 {
 	struct radeon_bo *bo = rbo->bo;
-	bo->reloc = &ctx->reloc[ctx->creloc];
-	bo->reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4;
-	ctx->reloc[ctx->creloc].handle = bo->handle;
-	ctx->reloc[ctx->creloc].read_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM);
-	ctx->reloc[ctx->creloc].write_domain = rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM);
-	ctx->reloc[ctx->creloc].flags = 0;
-	radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo);
-	ctx->creloc++;
+
+	unsigned reloc_index = ctx->radeon->ws->trans_add_reloc(ctx->cs, bo->cs_buf,
+								rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM),
+								rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM),
+								(void**)&ctx->reloc, &ctx->creloc);
+
+	bo->reloc = (void*)1;
+	bo->reloc_id = reloc_index * 4;
+	radeon_bo_reference(ctx->radeon, &ctx->bo[reloc_index], bo);
 }
 
 void r600_context_reg(struct r600_context *ctx,
@@ -1444,10 +1441,6 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 
 	if (draw->indices) {
 		ndwords = 11;
-		/* make sure there is enough relocation space before scheduling draw */
-		if (ctx->creloc >= (ctx->nreloc - 1)) {
-			r600_context_flush(ctx);
-		}
 	}
 
 	/* queries need some special values */
@@ -1570,6 +1563,7 @@ void r600_context_flush(struct r600_context *ctx)
 	ctx->pm4_dirty_cdwords = 0;
 	ctx->pm4_cdwords = 0;
 	ctx->flags = 0;
+	ctx->radeon->ws->cs_flush(ctx->cs, 0);
 
 	r600_init_cs(ctx);
 
@@ -1601,8 +1595,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo,
 {
 	unsigned ndwords = 10;
 
-	if (((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) ||
-	    (ctx->creloc >= (ctx->nreloc - 1))) {
+	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
 		r600_context_flush(ctx);
 	}
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 49d3060bbf2..baf7c98d578 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -65,6 +65,7 @@ struct r600_reg {
 struct radeon_bo {
 	struct pipe_reference		reference;
 	struct pb_buffer		*buf;
+	struct radeon_winsys_cs_handle	*cs_buf;
 	unsigned			handle;
 	unsigned			size;
 	int				map_count;
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index 63dc44ddb44..536bbe45bff 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -94,6 +94,7 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
 		FREE(bo);
 		return NULL;
 	}
+	bo->cs_buf = radeon->ws->buffer_get_cs_handle(bo->buf);
 	bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf);
 	bo->size = size;
 	return bo;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 1b30b95a318..9a1e16957d2 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -219,11 +219,11 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
     return -1;
 }
 
-static void radeon_add_reloc(struct radeon_cs_context *csc,
-                             struct radeon_bo *bo,
-                             enum radeon_bo_domain rd,
-                             enum radeon_bo_domain wd,
-                             enum radeon_bo_domain *added_domains)
+static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
+                                 struct radeon_bo *bo,
+                                 enum radeon_bo_domain rd,
+                                 enum radeon_bo_domain wd,
+                                 enum radeon_bo_domain *added_domains)
 {
     struct drm_radeon_cs_reloc *reloc;
     unsigned i;
@@ -233,7 +233,7 @@ static void radeon_add_reloc(struct radeon_cs_context *csc,
         reloc = csc->relocs_hashlist[hash];
         if (reloc->handle == bo->handle) {
             update_domains(reloc, rd, wd, added_domains);
-            return;
+            return csc->reloc_indices_hashlist[hash];
         }
 
         /* Hash collision, look for the BO in the list of relocs linearly. */
@@ -246,7 +246,7 @@ static void radeon_add_reloc(struct radeon_cs_context *csc,
                 csc->relocs_hashlist[hash] = reloc;
                 csc->reloc_indices_hashlist[hash] = i;
                 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
-                return;
+                return i;
             }
         }
     }
@@ -280,9 +280,9 @@ static void radeon_add_reloc(struct radeon_cs_context *csc,
     csc->reloc_indices_hashlist[hash] = csc->crelocs;
 
     csc->chunks[1].length_dw += RELOC_DWORDS;
-    csc->crelocs++;
 
     *added_domains = rd | wd;
+    return csc->crelocs++;
 }
 
 static void radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
@@ -470,6 +470,28 @@ static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
     return radeon_bo_is_referenced_by_cs(cs, bo);
 }
 
+static unsigned trans_add_reloc(struct radeon_winsys_cs *rcs,
+				struct radeon_winsys_cs_handle *buf,
+				enum radeon_bo_domain rd,
+                                enum radeon_bo_domain wd,
+				void **reloc_list, unsigned *reloc_count)
+{
+	struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+        struct radeon_bo *bo = (struct radeon_bo*)buf;
+        enum radeon_bo_domain added_domains;
+
+        unsigned index = radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains);
+
+        if (added_domains & RADEON_DOMAIN_GTT)
+            cs->csc->used_gart += bo->size;
+        if (added_domains & RADEON_DOMAIN_VRAM)
+            cs->csc->used_vram += bo->size;
+
+	*reloc_list = cs->csc->relocs;
+	*reloc_count = cs->csc->crelocs;
+	return index;
+}
+
 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
 {
     ws->base.cs_create = radeon_drm_cs_create;
@@ -480,4 +502,6 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
     ws->base.cs_flush = radeon_drm_cs_flush;
     ws->base.cs_set_flush = radeon_drm_cs_set_flush;
     ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
+
+    ws->base.trans_add_reloc = trans_add_reloc;
 }
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 2e5000702e0..8e81fa1e301 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -329,6 +329,11 @@ struct radeon_winsys {
 
     /* Transitional functions for r600g when moving to winsys/radeon */
     unsigned (*trans_get_buffer_handle)(struct pb_buffer *buf);
+    unsigned (*trans_add_reloc)(struct radeon_winsys_cs *cs,
+				struct radeon_winsys_cs_handle *buf,
+				enum radeon_bo_domain rd,
+                                enum radeon_bo_domain wd,
+				void **reloc_list, unsigned *reloc_count);
 };
 
 #endif

From 3e579722167dea997f547970e2b62d4594875b98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 3 Aug 2011 05:15:36 +0200
Subject: [PATCH 320/600] r600g: remove reloc-related variables from radeon_bo

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../winsys/r600/drm/evergreen_hw_context.c    |  3 +-
 src/gallium/winsys/r600/drm/r600_hw_context.c | 45 +++++--------------
 src/gallium/winsys/r600/drm/r600_priv.h       | 18 ++++----
 3 files changed, 22 insertions(+), 44 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 3f6f8b5368b..cd63c8db156 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -1192,8 +1192,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 		pm4[7] = draw->vgt_num_indices;
 		pm4[8] = draw->vgt_draw_initiator;
 		pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		pm4[10] = 0;
-		r600_context_bo_reloc(ctx, &pm4[10], draw->indices);
+		pm4[10] = r600_context_bo_reloc(ctx, draw->indices);
 	} else {
 		pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing);
 		pm4[5] = draw->vgt_num_indices;
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index bba55d68267..5dd079f62ac 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -86,8 +86,7 @@ void r600_get_backend_mask(struct r600_context *ctx)
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-		ctx->pm4[ctx->pm4_cdwords++] = 0;
-		r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], buffer);
+		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer);
 
 		/* execute */
 		r600_context_flush(ctx);
@@ -997,25 +996,11 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 		ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
 		ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		ctx->pm4[ctx->pm4_cdwords++] = bo->reloc_id;
+		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, rbo);
 	}
 	bo->last_flush = (bo->last_flush | flush_flags) & flush_mask;
 }
 
-void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo)
-{
-	struct radeon_bo *bo = rbo->bo;
-
-	unsigned reloc_index = ctx->radeon->ws->trans_add_reloc(ctx->cs, bo->cs_buf,
-								rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM),
-								rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM),
-								(void**)&ctx->reloc, &ctx->creloc);
-
-	bo->reloc = (void*)1;
-	bo->reloc_id = reloc_index * 4;
-	radeon_bo_reference(ctx->radeon, &ctx->bo[reloc_index], bo);
-}
-
 void r600_context_reg(struct r600_context *ctx,
 		      unsigned offset, unsigned value,
 		      unsigned mask)
@@ -1321,9 +1306,8 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *
 			if (block->pm4_bo_index[j]) {
 				/* find relocation */
 				id = block->pm4_bo_index[j];
-				r600_context_bo_reloc(ctx,
-						      &block->pm4[block->reloc[id].bo_pm4_index],
-						      block->reloc[id].bo);
+				block->pm4[block->reloc[id].bo_pm4_index] =
+					r600_context_bo_reloc(ctx, block->reloc[id].bo);
 				r600_context_bo_flush(ctx,
 						      block->reloc[id].flush_flags,
 						      block->reloc[id].flush_mask,
@@ -1376,9 +1360,8 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60
 		if (block->pm4_bo_index[j]) {
 			/* find relocation */
 			id = block->pm4_bo_index[j];
-			r600_context_bo_reloc(ctx,
-					      &block->pm4[block->reloc[id].bo_pm4_index],
-					      block->reloc[id].bo);
+			block->pm4[block->reloc[id].bo_pm4_index] =
+				r600_context_bo_reloc(ctx, block->reloc[id].bo);
 			r600_context_bo_flush(ctx,
 					      block->reloc[id].flush_flags,
 					      block->reloc[id].flush_mask,
@@ -1493,8 +1476,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 		pm4[7] = draw->vgt_num_indices;
 		pm4[8] = draw->vgt_draw_initiator;
 		pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		pm4[10] = 0;
-		r600_context_bo_reloc(ctx, &pm4[10], draw->indices);
+		pm4[10] = r600_context_bo_reloc(ctx, draw->indices);
 	} else {
 		pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing);
 		pm4[5] = draw->vgt_num_indices;
@@ -1555,7 +1537,6 @@ void r600_context_flush(struct r600_context *ctx)
 
 	/* restart */
 	for (int i = 0; i < ctx->creloc; i++) {
-		ctx->bo[i]->reloc = NULL;
 		ctx->bo[i]->last_flush = 0;
 		radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
 	}
@@ -1609,8 +1590,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo,
 	ctx->pm4[ctx->pm4_cdwords++] = value;                   /* DATA_LO */
 	ctx->pm4[ctx->pm4_cdwords++] = 0;                       /* DATA_HI */
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = 0;
-	r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], fence_bo);
+	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo);
 }
 
 static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait)
@@ -1721,8 +1701,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	}
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = 0;
-	r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
+	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer);
 
 	query->state |= R600_QUERY_STATE_STARTED;
 	query->state ^= R600_QUERY_STATE_ENDED;
@@ -1746,8 +1725,7 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	}
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = 0;
-	r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
+	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer);
 
 	query->results_end += query->result_size;
 	if (query->results_end >= query->buffer_size)
@@ -1792,8 +1770,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 			ctx->pm4[ctx->pm4_cdwords++] = results_base;
 			ctx->pm4[ctx->pm4_cdwords++] = op;
 			ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-			ctx->pm4[ctx->pm4_cdwords++] = 0;
-			r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], query->buffer);
+			ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer);
 			results_base += query->result_size;
 			if (results_base >= query->buffer_size)
 				results_base = 0;
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index baf7c98d578..930cf81813a 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -30,6 +30,7 @@
 #include "../../radeon/drm/radeon_winsys.h"
 #include "util/u_hash_table.h"
 #include "os/os_thread.h"
+#include "radeon_drm.h"
 
 #define PKT_COUNT_C                     0xC000FFFF
 #define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
@@ -71,8 +72,6 @@ struct radeon_bo {
 	int				map_count;
 	void				*data;
 
-	struct r600_reloc		*reloc;
-	unsigned			reloc_id;
 	unsigned			last_flush;
 	unsigned                        binding;
 };
@@ -111,7 +110,6 @@ int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo);
  * r600_hw_context.c
  */
 int r600_context_init_fence(struct r600_context *ctx);
-void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo);
 void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 				unsigned flush_mask, struct r600_bo *rbo);
 struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset);
@@ -129,17 +127,21 @@ void r600_context_reg(struct r600_context *ctx,
 void r600_init_cs(struct r600_context *ctx);
 int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base);
 
-static INLINE void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo)
+static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo)
 {
 	struct radeon_bo *bo = rbo->bo;
+	unsigned reloc_index;
 
 	assert(bo != NULL);
 
-	if (!bo->reloc)
-		r600_context_get_reloc(ctx, rbo);
+	reloc_index = ctx->radeon->ws->trans_add_reloc(
+				ctx->cs, bo->cs_buf,
+				rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM),
+				rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM),
+				(void**)&ctx->reloc, &ctx->creloc);
 
-	/* set PKT3 to point to proper reloc */
-	*pm4 = bo->reloc_id;
+	radeon_bo_reference(ctx->radeon, &ctx->bo[reloc_index], bo);
+	return reloc_index * 4;
 }
 
 /*

From 1acaf09778323ed6a2b0a0de5acb7731622a82bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 3 Aug 2011 19:27:49 +0200
Subject: [PATCH 321/600] r600g: don't use RADEON_GEM_DOMAIN_CPU

Also staging resources shouldn't be allocated with the initial domain
being VRAM.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_bo.c   | 41 ++++++++++++-------------
 src/gallium/winsys/r600/drm/r600_priv.h |  3 +-
 2 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 9fc799a1586..0e41a1709ee 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -40,26 +40,27 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 	 * and are used for uploads and downloads from regular
 	 * resources.  We generate them internally for some transfers.
 	 */
-	if (usage == PIPE_USAGE_STAGING)
-		domains = RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT;
-	else
-		domains = (RADEON_GEM_DOMAIN_CPU |
-				RADEON_GEM_DOMAIN_GTT |
-				RADEON_GEM_DOMAIN_VRAM);
-
-	switch(usage) {
-	case PIPE_USAGE_DYNAMIC:
-	case PIPE_USAGE_STREAM:
-	case PIPE_USAGE_STAGING:
+	if (usage == PIPE_USAGE_STAGING) {
+		domains = RADEON_GEM_DOMAIN_GTT;
 		initial_domain = RADEON_GEM_DOMAIN_GTT;
-		break;
-	case PIPE_USAGE_DEFAULT:
-	case PIPE_USAGE_STATIC:
-	case PIPE_USAGE_IMMUTABLE:
-	default:
-		initial_domain = RADEON_GEM_DOMAIN_VRAM;
-		break;
+	} else {
+		domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
+
+		switch(usage) {
+		case PIPE_USAGE_DYNAMIC:
+		case PIPE_USAGE_STREAM:
+		case PIPE_USAGE_STAGING:
+			initial_domain = RADEON_GEM_DOMAIN_GTT;
+			break;
+		case PIPE_USAGE_DEFAULT:
+		case PIPE_USAGE_STATIC:
+		case PIPE_USAGE_IMMUTABLE:
+		default:
+			initial_domain = RADEON_GEM_DOMAIN_VRAM;
+			break;
+		}
 	}
+
 	rbo = radeon_bo(radeon, 0, size, alignment, binding, initial_domain);
 	if (rbo == NULL) {
 		return NULL;
@@ -87,9 +88,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan
 	}
 
 	pipe_reference_init(&bo->reference, 1);
-	bo->domains = (RADEON_GEM_DOMAIN_CPU |
-			RADEON_GEM_DOMAIN_GTT |
-			RADEON_GEM_DOMAIN_VRAM);
+	bo->domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
 
 	if (stride)
 		*stride = whandle->stride;
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 930cf81813a..5bb515d743e 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -136,8 +136,7 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6
 
 	reloc_index = ctx->radeon->ws->trans_add_reloc(
 				ctx->cs, bo->cs_buf,
-				rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM),
-				rbo->domains & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM),
+				rbo->domains, rbo->domains,
 				(void**)&ctx->reloc, &ctx->creloc);
 
 	radeon_bo_reference(ctx->radeon, &ctx->bo[reloc_index], bo);

From 685b8345a8ff69460f0c62c678493527b59b18a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 00:15:54 +0200
Subject: [PATCH 322/600] r600g: remove struct r600_reloc

That is really private to winsys/radeon.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600.h               | 15 ++-------------
 src/gallium/winsys/r600/drm/r600_hw_context.c |  2 +-
 2 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 3ac60bce611..0562b6da31e 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -190,18 +190,6 @@ struct r600_range {
 	struct r600_block	**blocks;
 };
 
-/*
- * relocation
- */
-#pragma pack(1)
-struct r600_reloc {
-	uint32_t	handle;
-	uint32_t	read_domain;
-	uint32_t	write_domain;
-	uint32_t	flags;
-};
-#pragma pack()
-
 /*
  * query
  */
@@ -240,6 +228,7 @@ struct r600_query {
 struct r600_context {
 	struct radeon		*radeon;
 	struct radeon_winsys_cs	*cs;
+
 	struct r600_range	*range;
 	unsigned		nblocks;
 	struct r600_block	**blocks;
@@ -253,7 +242,7 @@ struct r600_context {
 	unsigned		init_dwords;
 
 	unsigned		creloc;
-	struct r600_reloc	*reloc;
+	unsigned		*reloc;
 	struct radeon_bo	**bo;
 
 	u32			*pm4;
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 5dd079f62ac..1950e583121 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -1521,7 +1521,7 @@ void r600_context_flush(struct r600_context *ctx)
 	chunks[0].length_dw = ctx->pm4_cdwords;
 	chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4;
 	chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
-	chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4;
+	chunks[1].length_dw = ctx->creloc * 4;
 	chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc;
 	chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0];
 	chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1];

From e6fb62594fca714883af9bba9795be8838c16900 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 01:37:33 +0200
Subject: [PATCH 323/600] r600g: emit CS using radeon_winsys

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600.h               |  4 +-
 .../winsys/r600/drm/evergreen_hw_context.c    |  6 +--
 src/gallium/winsys/r600/drm/r600_drm.c        |  3 ++
 src/gallium/winsys/r600/drm/r600_hw_context.c | 39 ++++---------------
 src/gallium/winsys/r600/drm/r600_priv.h       |  9 +++--
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c |  5 +--
 src/gallium/winsys/radeon/drm/radeon_winsys.h |  3 +-
 7 files changed, 20 insertions(+), 49 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 0562b6da31e..84cfa2a17e6 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -236,16 +236,16 @@ struct r600_context {
 	struct list_head	resource_dirty;
 	struct list_head	enable_list;
 	unsigned		pm4_ndwords;
-	unsigned		pm4_cdwords;
 	unsigned		pm4_dirty_cdwords;
 	unsigned		ctx_pm4_ndwords;
 	unsigned		init_dwords;
 
 	unsigned		creloc;
-	unsigned		*reloc;
 	struct radeon_bo	**bo;
 
 	u32			*pm4;
+	unsigned		pm4_cdwords;
+
 	struct list_head	query_list;
 	unsigned		num_query_running;
 	unsigned		backend_mask;
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index cd63c8db156..29da7bea4c6 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -995,11 +995,7 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
 		goto out_err;
 	}
 	ctx->pm4_ndwords = RADEON_CTX_MAX_PM4;
-	ctx->pm4 = calloc(ctx->pm4_ndwords, 4);
-	if (ctx->pm4 == NULL) {
-		r = -ENOMEM;
-		goto out_err;
-	}
+	ctx->pm4 = ctx->cs->buf;
 
 	r600_init_cs(ctx);
 	/* save 16dwords space for fence mecanism */
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index f0ef55e98d5..270a07a3a89 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -292,6 +292,9 @@ struct radeon *radeon_create(struct radeon_winsys *ws)
 		radeon_get_backend_map(radeon);
 	}
 
+	/* XXX disable ioctl thread offloading until the porting is done. */
+	setenv("RADEON_THREAD", "0", 0);
+
 	return radeon;
 }
 
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 1950e583121..59450b5ba83 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -776,7 +776,6 @@ void r600_context_fini(struct r600_context *ctx)
 	free(ctx->range);
 	free(ctx->blocks);
 	free(ctx->bo);
-	free(ctx->pm4);
 	ctx->radeon->ws->cs_destroy(ctx->cs);
 
 	memset(ctx, 0, sizeof(struct r600_context));
@@ -920,11 +919,7 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
 		goto out_err;
 	}
 	ctx->pm4_ndwords = RADEON_CTX_MAX_PM4;
-	ctx->pm4 = calloc(ctx->pm4_ndwords, 4);
-	if (ctx->pm4 == NULL) {
-		r = -ENOMEM;
-		goto out_err;
-	}
+	ctx->pm4 = ctx->cs->buf;
 
 	r600_init_cs(ctx);
 	/* save 16dwords space for fence mecanism */
@@ -1492,10 +1487,6 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 
 void r600_context_flush(struct r600_context *ctx)
 {
-	struct drm_radeon_cs drmib = {};
-	struct drm_radeon_cs_chunk chunks[2];
-	uint64_t chunk_array[2];
-	int r;
 	struct r600_block *enable_block = NULL;
 
 	if (ctx->pm4_cdwords == ctx->init_dwords)
@@ -1513,27 +1504,12 @@ void r600_context_flush(struct r600_context *ctx)
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
 	ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
 
-#if 1
-	/* emit cs */
-	drmib.num_chunks = 2;
-	drmib.chunks = (uint64_t)(uintptr_t)chunk_array;
-	chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
-	chunks[0].length_dw = ctx->pm4_cdwords;
-	chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4;
-	chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
-	chunks[1].length_dw = ctx->creloc * 4;
-	chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc;
-	chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0];
-	chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1];
-	r = drmCommandWriteRead(ctx->radeon->info.fd, DRM_RADEON_CS, &drmib,
-				sizeof(struct drm_radeon_cs));
-	if (r) {
-		fprintf(stderr, "radeon: The kernel rejected CS, "
-			"see dmesg for more information.\n");
-	}
-#else
-	*ctx->radeon->cfence = ctx->radeon->fence;
-#endif
+	/* Flush the CS. */
+	ctx->cs->cdw = ctx->pm4_cdwords;
+	ctx->radeon->ws->cs_flush(ctx->cs, 0);
+	/* We need to get the pointer to the other CS,
+	 * the command streams are double-buffered. */
+	ctx->pm4 = ctx->cs->buf;
 
 	/* restart */
 	for (int i = 0; i < ctx->creloc; i++) {
@@ -1544,7 +1520,6 @@ void r600_context_flush(struct r600_context *ctx)
 	ctx->pm4_dirty_cdwords = 0;
 	ctx->pm4_cdwords = 0;
 	ctx->flags = 0;
-	ctx->radeon->ws->cs_flush(ctx->cs, 0);
 
 	r600_init_cs(ctx);
 
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 5bb515d743e..83e964b7f69 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -134,10 +134,11 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6
 
 	assert(bo != NULL);
 
-	reloc_index = ctx->radeon->ws->trans_add_reloc(
-				ctx->cs, bo->cs_buf,
-				rbo->domains, rbo->domains,
-				(void**)&ctx->reloc, &ctx->creloc);
+	reloc_index =
+		ctx->radeon->ws->trans_add_reloc(ctx->cs, bo->cs_buf, rbo->domains, rbo->domains);
+
+	if (reloc_index >= ctx->creloc)
+		ctx->creloc = reloc_index+1;
 
 	radeon_bo_reference(ctx->radeon, &ctx->bo[reloc_index], bo);
 	return reloc_index * 4;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 9a1e16957d2..09befb39b11 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -473,8 +473,7 @@ static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
 static unsigned trans_add_reloc(struct radeon_winsys_cs *rcs,
 				struct radeon_winsys_cs_handle *buf,
 				enum radeon_bo_domain rd,
-                                enum radeon_bo_domain wd,
-				void **reloc_list, unsigned *reloc_count)
+                                enum radeon_bo_domain wd)
 {
 	struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
         struct radeon_bo *bo = (struct radeon_bo*)buf;
@@ -487,8 +486,6 @@ static unsigned trans_add_reloc(struct radeon_winsys_cs *rcs,
         if (added_domains & RADEON_DOMAIN_VRAM)
             cs->csc->used_vram += bo->size;
 
-	*reloc_list = cs->csc->relocs;
-	*reloc_count = cs->csc->crelocs;
 	return index;
 }
 
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 8e81fa1e301..073b6aad79a 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -332,8 +332,7 @@ struct radeon_winsys {
     unsigned (*trans_add_reloc)(struct radeon_winsys_cs *cs,
 				struct radeon_winsys_cs_handle *buf,
 				enum radeon_bo_domain rd,
-                                enum radeon_bo_domain wd,
-				void **reloc_list, unsigned *reloc_count);
+                                enum radeon_bo_domain wd);
 };
 
 #endif

From 03b25ad8ffd72f1f88b6c80a2ebfe3cf6e8a6390 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 02:36:57 +0200
Subject: [PATCH 324/600] winsys/radeon: consolidate the add_reloc function

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_priv.h       |  2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 36 ++++---------------
 src/gallium/winsys/radeon/drm/radeon_winsys.h | 13 +++----
 3 files changed, 13 insertions(+), 38 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 83e964b7f69..54b66cc9e35 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -135,7 +135,7 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6
 	assert(bo != NULL);
 
 	reloc_index =
-		ctx->radeon->ws->trans_add_reloc(ctx->cs, bo->cs_buf, rbo->domains, rbo->domains);
+		ctx->radeon->ws->cs_add_reloc(ctx->cs, bo->cs_buf, rbo->domains, rbo->domains);
 
 	if (reloc_index >= ctx->creloc)
 		ctx->creloc = reloc_index+1;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 09befb39b11..6da0ae67743 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -285,24 +285,23 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
     return csc->crelocs++;
 }
 
-static void radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
-                                    struct radeon_winsys_cs_handle *buf,
-                                    enum radeon_bo_domain rd,
-                                    enum radeon_bo_domain wd)
+static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
+                                        struct radeon_winsys_cs_handle *buf,
+                                        enum radeon_bo_domain rd,
+                                        enum radeon_bo_domain wd)
 {
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
     struct radeon_bo *bo = (struct radeon_bo*)buf;
     enum radeon_bo_domain added_domains;
 
-    radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains);
-
-    if (!added_domains)
-        return;
+    unsigned index = radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains);
 
     if (added_domains & RADEON_DOMAIN_GTT)
         cs->csc->used_gart += bo->size;
     if (added_domains & RADEON_DOMAIN_VRAM)
         cs->csc->used_vram += bo->size;
+
+    return index;
 }
 
 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
@@ -470,25 +469,6 @@ static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
     return radeon_bo_is_referenced_by_cs(cs, bo);
 }
 
-static unsigned trans_add_reloc(struct radeon_winsys_cs *rcs,
-				struct radeon_winsys_cs_handle *buf,
-				enum radeon_bo_domain rd,
-                                enum radeon_bo_domain wd)
-{
-	struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
-        struct radeon_bo *bo = (struct radeon_bo*)buf;
-        enum radeon_bo_domain added_domains;
-
-        unsigned index = radeon_add_reloc(cs->csc, bo, rd, wd, &added_domains);
-
-        if (added_domains & RADEON_DOMAIN_GTT)
-            cs->csc->used_gart += bo->size;
-        if (added_domains & RADEON_DOMAIN_VRAM)
-            cs->csc->used_vram += bo->size;
-
-	return index;
-}
-
 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
 {
     ws->base.cs_create = radeon_drm_cs_create;
@@ -499,6 +479,4 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
     ws->base.cs_flush = radeon_drm_cs_flush;
     ws->base.cs_set_flush = radeon_drm_cs_set_flush;
     ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
-
-    ws->base.trans_add_reloc = trans_add_reloc;
 }
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 073b6aad79a..6360d6a8401 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -259,11 +259,12 @@ struct radeon_winsys {
      * \param buf A winsys buffer to validate.
      * \param rd  A read domain containing a bitmask of the RADEON_DOMAIN_* flags.
      * \param wd  A write domain containing a bitmask of the RADEON_DOMAIN_* flags.
+     * \return Relocation index.
      */
-    void (*cs_add_reloc)(struct radeon_winsys_cs *cs,
-                         struct radeon_winsys_cs_handle *buf,
-                         enum radeon_bo_domain rd,
-                         enum radeon_bo_domain wd);
+    unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs,
+                             struct radeon_winsys_cs_handle *buf,
+                             enum radeon_bo_domain rd,
+                             enum radeon_bo_domain wd);
 
     /**
      * Return TRUE if there is enough memory in VRAM and GTT for the relocs
@@ -329,10 +330,6 @@ struct radeon_winsys {
 
     /* Transitional functions for r600g when moving to winsys/radeon */
     unsigned (*trans_get_buffer_handle)(struct pb_buffer *buf);
-    unsigned (*trans_add_reloc)(struct radeon_winsys_cs *cs,
-				struct radeon_winsys_cs_handle *buf,
-				enum radeon_bo_domain rd,
-                                enum radeon_bo_domain wd);
 };
 
 #endif

From 1b542aca6e998e544a90ccff310f74b2811b8db0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 03:01:44 +0200
Subject: [PATCH 325/600] r600g: move more DRM queries into winsys/radeon

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_drm.c        | 45 +------------------
 src/gallium/winsys/r600/drm/r600_hw_context.c |  2 +-
 src/gallium/winsys/r600/drm/r600_priv.h       |  3 --
 .../winsys/radeon/drm/radeon_drm_winsys.c     | 19 +++++++-
 src/gallium/winsys/radeon/drm/radeon_winsys.h |  3 ++
 5 files changed, 24 insertions(+), 48 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index 270a07a3a89..a1b0ba1fb0f 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -68,12 +68,12 @@ unsigned r600_get_num_backends(struct radeon *radeon)
 
 unsigned r600_get_num_tile_pipes(struct radeon *radeon)
 {
-	return radeon->num_tile_pipes;
+	return radeon->info.r600_num_tile_pipes;
 }
 
 unsigned r600_get_backend_map(struct radeon *radeon)
 {
-	return radeon->backend_map;
+	return radeon->info.r600_backend_map;
 }
 
 unsigned r600_get_minor_version(struct radeon *radeon)
@@ -185,42 +185,6 @@ static int radeon_drm_get_tiling(struct radeon *radeon)
 	}
 }
 
-static int radeon_get_num_tile_pipes(struct radeon *radeon)
-{
-	struct drm_radeon_info info = {};
-	uint32_t num_tile_pipes = 0;
-	int r;
-
-	info.request = RADEON_INFO_NUM_TILE_PIPES;
-	info.value = (uintptr_t)&num_tile_pipes;
-	r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_INFO, &info,
-			sizeof(struct drm_radeon_info));
-	if (r)
-		return r;
-
-	radeon->num_tile_pipes = num_tile_pipes;
-	return 0;
-}
-
-static int radeon_get_backend_map(struct radeon *radeon)
-{
-	struct drm_radeon_info info = {};
-	uint32_t backend_map = 0;
-	int r;
-
-	info.request = RADEON_INFO_BACKEND_MAP;
-	info.value = (uintptr_t)&backend_map;
-	r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_INFO, &info,
-			sizeof(struct drm_radeon_info));
-	if (r)
-		return r;
-
-	radeon->backend_map = backend_map;
-	radeon->backend_map_valid = TRUE;
-
-	return 0;
-}
-
 struct radeon *radeon_create(struct radeon_winsys *ws)
 {
 	struct radeon *radeon = CALLOC_STRUCT(radeon);
@@ -287,11 +251,6 @@ struct radeon *radeon_create(struct radeon_winsys *ws)
 	if (radeon_drm_get_tiling(radeon))
 		return NULL;
 
-	if (radeon->info.drm_minor >= 11) {
-		radeon_get_num_tile_pipes(radeon);
-		radeon_get_backend_map(radeon);
-	}
-
 	/* XXX disable ioctl thread offloading until the porting is done. */
 	setenv("RADEON_THREAD", "0", 0);
 
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 59450b5ba83..c72e8548de8 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -41,7 +41,7 @@ void r600_get_backend_mask(struct r600_context *ctx)
 	unsigned i, mask = 0;
 
 	/* if backend_map query is supported by the kernel */
-	if (ctx->radeon->backend_map_valid) {
+	if (ctx->radeon->info.r600_backend_map_valid) {
 		unsigned num_tile_pipes = r600_get_num_tile_pipes(ctx->radeon);
 		unsigned backend_map = r600_get_backend_map(ctx->radeon);
 		unsigned item_width, item_mask;
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 54b66cc9e35..036468e3a31 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -41,9 +41,6 @@ struct radeon {
 	unsigned			family;
 	enum chip_class			chip_class;
 	struct r600_tiling_info		tiling_info;
-	unsigned			num_tile_pipes;
-	unsigned			backend_map;
-	boolean				backend_map_valid;
 };
 
 /* these flags are used in register flags and added into block flags */
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 3be6e34f6f0..1f3bd6dd7bd 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -58,7 +58,15 @@
 #endif
 
 #ifndef RADEON_INFO_NUM_BACKENDS
-#define RADEON_INFO_NUM_BACKENDS 10
+#define RADEON_INFO_NUM_BACKENDS 0xa
+#endif
+
+#ifndef RADEON_INFO_NUM_TILE_PIPES
+#define RADEON_INFO_NUM_TILE_PIPES 0xb
+#endif
+
+#ifndef RADEON_INFO_BACKEND_MAP
+#define RADEON_INFO_BACKEND_MAP 0xd
 #endif
 
 /* Enable/disable feature access for one command stream.
@@ -240,6 +248,15 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
 
         radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL,
                              &ws->info.r600_tiling_config);
+
+        if (ws->info.drm_minor >= 11) {
+            radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,
+                                 &ws->info.r600_num_tile_pipes);
+
+            if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL,
+                                      &ws->info.r600_backend_map))
+                ws->info.r600_backend_map_valid = TRUE;
+        }
     }
 
     return TRUE;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 6360d6a8401..dcb3f587a6e 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -85,6 +85,9 @@ struct radeon_info {
     uint32_t r600_num_backends;
     uint32_t r600_clock_crystal_freq;
     uint32_t r600_tiling_config;
+    uint32_t r600_num_tile_pipes;
+    uint32_t r600_backend_map;
+    boolean r600_backend_map_valid;
 };
 
 enum radeon_feature_id {

From ecfcf25387284f01131eeaf9ec3f72bc481f3cfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 03:07:42 +0200
Subject: [PATCH 326/600] r600g: get winsys_handle using radeon_winsys

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_bo.c   | 15 +--------------
 src/gallium/winsys/r600/drm/r600_priv.h |  3 ---
 src/gallium/winsys/r600/drm/radeon_bo.c | 16 ----------------
 3 files changed, 1 insertion(+), 33 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 0e41a1709ee..9e2c08ab56f 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -160,18 +160,5 @@ void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo)
 boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *bo,
 				unsigned stride, struct winsys_handle *whandle)
 {
-	whandle->stride = stride;
-	switch(whandle->type) {
-	case DRM_API_HANDLE_TYPE_KMS:
-		whandle->handle = bo->bo->handle;
-		break;
-	case DRM_API_HANDLE_TYPE_SHARED:
-		if (radeon_bo_get_name(radeon, bo->bo, &whandle->handle))
-			return FALSE;
-		break;
-	default:
-		return FALSE;
-	}
-
-	return TRUE;
+	return radeon->ws->buffer_get_handle(bo->bo->buf, stride, whandle);
 }
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 036468e3a31..0efb02aadae 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -98,9 +98,6 @@ int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32
 int radeon_bo_get_tiling_flags(struct radeon *radeon,
 			       struct radeon_bo *bo,
 			       uint32_t *tiling_flags);
-int radeon_bo_get_name(struct radeon *radeon,
-		       struct radeon_bo *bo,
-		       uint32_t *name);
 int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo);
 
 /*
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index 536bbe45bff..9d351480d59 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -165,19 +165,3 @@ int radeon_bo_get_tiling_flags(struct radeon *radeon,
 	*tiling_flags = args.tiling_flags;
 	return ret;
 }
-
-int radeon_bo_get_name(struct radeon *radeon,
-		       struct radeon_bo *bo,
-		       uint32_t *name)
-{
-	struct drm_gem_flink flink;
-	int ret;
-
-	flink.handle = bo->handle;
-	ret = drmIoctl(radeon->info.fd, DRM_IOCTL_GEM_FLINK, &flink);
-	if (ret)
-		return ret;
-
-	*name = flink.name;
-	return ret;
-}

From 7ee65800c36a5ee3f6b8ec4ae2d4f15f91d4661b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 03:19:33 +0200
Subject: [PATCH 327/600] r600g: get tiling flags using radeon_winsys

Also remove some unused fence-related leftovers.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_bo.c   | 18 +++++++++---------
 src/gallium/winsys/r600/drm/r600_priv.h | 18 ------------------
 src/gallium/winsys/r600/drm/radeon_bo.c | 17 -----------------
 3 files changed, 9 insertions(+), 44 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 9e2c08ab56f..0b2a9aabac3 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -79,7 +79,6 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan
 {
 	struct r600_bo *bo = calloc(1, sizeof(struct r600_bo));
 	struct radeon_bo *rbo;
-	unsigned tiling_flags;
 
 	rbo = bo->bo = radeon_bo(radeon, whandle->handle, 0, 0, 0, 0);
 	if (rbo == NULL) {
@@ -93,16 +92,17 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan
 	if (stride)
 		*stride = whandle->stride;
 
-	radeon_bo_get_tiling_flags(radeon, rbo, &tiling_flags);
 	if (array_mode) {
-		if (tiling_flags) {
-			if (tiling_flags & RADEON_TILING_MACRO)
-				*array_mode = V_0280A0_ARRAY_2D_TILED_THIN1;
-			else if (tiling_flags & RADEON_TILING_MICRO)
-				*array_mode = V_0280A0_ARRAY_1D_TILED_THIN1;
-		} else {
+		enum radeon_bo_layout micro, macro;
+
+		radeon->ws->buffer_get_tiling(rbo->buf, &micro, &macro);
+
+		if (macro == RADEON_LAYOUT_TILED)
+			*array_mode = V_0280A0_ARRAY_2D_TILED_THIN1;
+		else if (micro == RADEON_LAYOUT_TILED)
+			*array_mode = V_0280A0_ARRAY_1D_TILED_THIN1;
+		else
 			*array_mode = 0;
-		}
 	}
 	return bo;
 }
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 0efb02aadae..952f91df89a 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -94,16 +94,11 @@ void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst,
 			 struct radeon_bo *src);
 int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo);
 int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain);
-int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo);
-int radeon_bo_get_tiling_flags(struct radeon *radeon,
-			       struct radeon_bo *bo,
-			       uint32_t *tiling_flags);
 int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo);
 
 /*
  * r600_hw_context.c
  */
-int r600_context_init_fence(struct r600_context *ctx);
 void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 				unsigned flush_mask, struct r600_bo *rbo);
 struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset);
@@ -161,17 +156,4 @@ static inline void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo)
 	assert(bo->map_count >= 0);
 }
 
-/*
- * fence
- */
-static inline boolean fence_is_after(unsigned fence, unsigned ofence)
-{
-	/* handle wrap around */
-	if (fence < 0x80000000 && ofence > 0x80000000)
-		return TRUE;
-	if (fence > ofence)
-		return TRUE;
-	return FALSE;
-}
-
 #endif
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index 9d351480d59..f2f53a14a58 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -148,20 +148,3 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain
 	*domain = args.domain;
 	return ret;
 }
-
-int radeon_bo_get_tiling_flags(struct radeon *radeon,
-			       struct radeon_bo *bo,
-			       uint32_t *tiling_flags)
-{
-	struct drm_radeon_gem_get_tiling args = {};
-	int ret;
-
-	args.handle = bo->handle;
-	ret = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_GET_TILING,
-				  &args, sizeof(args));
-	if (ret)
-		return ret;
-
-	*tiling_flags = args.tiling_flags;
-	return ret;
-}

From e2e1dc9e66ff348caa97b7b35e558d75c6cc6899 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 03:38:20 +0200
Subject: [PATCH 328/600] r600g: set the flush callback in radeon_winsys

I have also renamed the winsys function.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r300/r300_context.c       |  2 +-
 src/gallium/drivers/r600/r600.h               |  2 +-
 src/gallium/drivers/r600/r600_pipe.c          | 21 +++++++++++++---
 .../winsys/r600/drm/evergreen_hw_context.c    |  2 +-
 src/gallium/winsys/r600/drm/r600_hw_context.c | 25 ++++++++++---------
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c |  2 +-
 src/gallium/winsys/radeon/drm/radeon_winsys.h |  6 ++---
 7 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 5c222588e47..b304999d424 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -457,7 +457,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300_init_render_functions(r300);
     r300_init_states(&r300->context);
 
-    rws->cs_set_flush(r300->cs, r300_flush_callback, r300);
+    rws->cs_set_flush_callback(r300->cs, r300_flush_callback, r300);
 
     /* The KIL opcode needs the first texture unit to be enabled
      * on r3xx-r4xx. In order to calm down the CS checker, we bind this
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 84cfa2a17e6..21b42736c6a 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -278,7 +278,7 @@ void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r6
 void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid);
 void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
 void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id);
-void r600_context_flush(struct r600_context *ctx);
+void r600_context_flush(struct r600_context *ctx, unsigned flags);
 void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw);
 
 struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 8a18207d1ea..e3e31982acd 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -114,8 +114,10 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx)
 	return fence;
 }
 
+
 static void r600_flush(struct pipe_context *ctx,
-			struct pipe_fence_handle **fence)
+		       struct pipe_fence_handle **fence,
+		       unsigned flags)
 {
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_fence **rfence = (struct r600_fence**)fence;
@@ -123,7 +125,18 @@ static void r600_flush(struct pipe_context *ctx,
 	if (rfence)
 		*rfence = r600_create_fence(rctx);
 
-	r600_context_flush(&rctx->ctx);
+	r600_context_flush(&rctx->ctx, flags);
+}
+
+static void r600_flush_from_st(struct pipe_context *ctx,
+			       struct pipe_fence_handle **fence)
+{
+	r600_flush(ctx, fence, 0);
+}
+
+static void r600_flush_from_winsys(void *ctx, unsigned flags)
+{
+	r600_flush((struct pipe_context*)ctx, NULL, flags);
 }
 
 static void r600_update_num_contexts(struct r600_screen *rscreen, int diff)
@@ -194,7 +207,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 	rctx->context.screen = screen;
 	rctx->context.priv = priv;
 	rctx->context.destroy = r600_destroy_context;
-	rctx->context.flush = r600_flush;
+	rctx->context.flush = r600_flush_from_st;
 
 	/* Easy accessing of screen/winsys. */
 	rctx->screen = rscreen;
@@ -244,6 +257,8 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 		return NULL;
 	}
 
+	rctx->screen->ws->cs_set_flush_callback(rctx->ctx.cs, r600_flush_from_winsys, rctx);
+
 	util_slab_create(&rctx->pool_transfers,
 			 sizeof(struct pipe_transfer), 64,
 			 UTIL_SLAB_SINGLETHREADED);
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 29da7bea4c6..1d582ceeaa5 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -1158,7 +1158,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, 0);
 	}
 	/* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
 	if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) {
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index c72e8548de8..760bfc591c0 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -89,7 +89,7 @@ void r600_get_backend_mask(struct r600_context *ctx)
 		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer);
 
 		/* execute */
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, 0);
 
 		/* analyze results */
 		results = r600_bo_map(ctx->radeon, buffer, PB_USAGE_CPU_READ, NULL);
@@ -940,7 +940,7 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags)
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, 0);
 	}
 
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing);
@@ -1441,7 +1441,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, 0);
 	}
 	/* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
 	if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) {
@@ -1485,7 +1485,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 	ctx->pm4_dirty_cdwords = 0;
 }
 
-void r600_context_flush(struct r600_context *ctx)
+void r600_context_flush(struct r600_context *ctx, unsigned flags)
 {
 	struct r600_block *enable_block = NULL;
 
@@ -1506,7 +1506,8 @@ void r600_context_flush(struct r600_context *ctx)
 
 	/* Flush the CS. */
 	ctx->cs->cdw = ctx->pm4_cdwords;
-	ctx->radeon->ws->cs_flush(ctx->cs, 0);
+	ctx->radeon->ws->cs_flush(ctx->cs, flags);
+
 	/* We need to get the pointer to the other CS,
 	 * the command streams are double-buffered. */
 	ctx->pm4 = ctx->cs->buf;
@@ -1553,7 +1554,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo,
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, 0);
 	}
 
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
@@ -1615,7 +1616,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 
 	if ((required_space + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, 0);
 	}
 
 	if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) {
@@ -1626,7 +1627,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 			query->queries_emitted = 1;
 		} else {
 			if (++query->queries_emitted > query->buffer_size / query->result_size / 2)
-				r600_context_flush(ctx);
+				r600_context_flush(ctx, 0);
 		}
 	}
 
@@ -1637,7 +1638,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 	/* collect current results if query buffer is full */
 	if (new_results_end == query->results_start) {
 		if (!(query->state & R600_QUERY_STATE_FLUSHED))
-			r600_context_flush(ctx);
+			r600_context_flush(ctx, 0);
 		r600_query_result(ctx, query, TRUE);
 	}
 
@@ -1718,7 +1719,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 {
 	if (operation == PREDICATION_OP_CLEAR) {
 		if (ctx->pm4_cdwords + 3 > ctx->pm4_ndwords)
-			r600_context_flush(ctx);
+			r600_context_flush(ctx, 0);
 
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
@@ -1734,7 +1735,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 		count /= query->result_size;
 
 		if (ctx->pm4_cdwords + 5 * count > ctx->pm4_ndwords)
-			r600_context_flush(ctx);
+			r600_context_flush(ctx, 0);
 
 		op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
 				(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
@@ -1807,7 +1808,7 @@ boolean r600_context_query_result(struct r600_context *ctx,
 	uint64_t *result = (uint64_t*)vresult;
 
 	if (!(query->state & R600_QUERY_STATE_FLUSHED)) {
-		r600_context_flush(ctx);
+		r600_context_flush(ctx, 0);
 	}
 	if (!r600_query_result(ctx, query, wait))
 		return FALSE;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 6da0ae67743..1ec324e5b74 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -477,6 +477,6 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
     ws->base.cs_validate = radeon_drm_cs_validate;
     ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
     ws->base.cs_flush = radeon_drm_cs_flush;
-    ws->base.cs_set_flush = radeon_drm_cs_set_flush;
+    ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
     ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
 }
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index dcb3f587a6e..47274055207 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -306,9 +306,9 @@ struct radeon_winsys {
      * \param flush     A flush callback function associated with the command stream.
      * \param user      A user pointer that will be passed to the flush callback.
      */
-    void (*cs_set_flush)(struct radeon_winsys_cs *cs,
-                         void (*flush)(void *ctx, unsigned flags),
-                         void *ctx);
+    void (*cs_set_flush_callback)(struct radeon_winsys_cs *cs,
+                                  void (*flush)(void *ctx, unsigned flags),
+                                  void *ctx);
 
     /**
      * Return TRUE if a buffer is referenced by a command stream.

From ab630b5768b0bfa4d7729d110ce4fb8f42e0cfb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 04:27:48 +0200
Subject: [PATCH 329/600] r600g: use buffer_map/unmap from radeon_winsys

This also drops the unneeded bo_busy/wait functions.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600.h               |  3 +-
 src/gallium/drivers/r600/r600_asm.c           |  2 +-
 src/gallium/drivers/r600/r600_buffer.c        |  7 +-
 src/gallium/drivers/r600/r600_pipe.c          |  4 +-
 src/gallium/drivers/r600/r600_shader.c        |  2 +-
 src/gallium/drivers/r600/r600_texture.c       |  5 +-
 src/gallium/winsys/r600/drm/r600_bo.c         | 40 +----------
 src/gallium/winsys/r600/drm/r600_hw_context.c | 10 +--
 src/gallium/winsys/r600/drm/r600_priv.h       | 27 +-------
 src/gallium/winsys/r600/drm/radeon_bo.c       | 69 -------------------
 10 files changed, 23 insertions(+), 146 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 21b42736c6a..0c70fe2bb0a 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -95,13 +95,14 @@ unsigned r600_get_backend_map(struct radeon *radeon);
 
 /* r600_bo.c */
 struct r600_bo;
+struct radeon_winsys_cs;
 
 struct r600_bo *r600_bo(struct radeon *radeon,
 			unsigned size, unsigned alignment,
 			unsigned binding, unsigned usage);
 struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle,
 				unsigned *stride, unsigned *array_mode);
-void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx);
+void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, struct radeon_winsys_cs *cs, unsigned usage);
 void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
 boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo,
 				  unsigned stride, struct winsys_handle *whandle);
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 24af9917a6f..f5244a723f4 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2231,7 +2231,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 		return -ENOMEM;
 	}
 
-	bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
+	bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
 	if (bytecode == NULL) {
 		r600_bc_clear(&bc);
 		r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 28d8c6af1cb..b8c6a419748 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -85,7 +85,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
 	if (rbuffer->r.b.user_ptr)
 		return (uint8_t*)rbuffer->r.b.user_ptr + transfer->box.x;
 
-	data = r600_bo_map(rctx->screen->radeon, rbuffer->r.bo, transfer->usage, pipe);
+	data = r600_bo_map(rctx->screen->radeon, rbuffer->r.bo, rctx->ctx.cs, transfer->usage);
 	if (!data)
 		return NULL;
 
@@ -134,9 +134,8 @@ static void r600_buffer_transfer_inline_write(struct pipe_context *pipe,
 
 	assert(rbuffer->r.b.user_ptr == NULL);
 
-	map = r600_bo_map(radeon, rbuffer->r.bo,
-			  PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage,
-			  pipe);
+	map = r600_bo_map(radeon, rbuffer->r.bo, rctx->ctx.cs,
+			  PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | usage);
 
 	memcpy(map + box->x, data, box->width);
 
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index e3e31982acd..1072ea0744d 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -54,6 +54,7 @@
  */
 static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx)
 {
+	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_fence *fence = NULL;
 
 	if (!ctx->fences.bo) {
@@ -63,7 +64,8 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx)
 			R600_ERR("r600: failed to create bo for fence objects\n");
 			return NULL;
 		}
-		ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, PIPE_TRANSFER_UNSYNCHRONIZED, NULL);
+		ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, rctx->ctx.cs,
+					       PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_WRITE);
 	}
 
 	if (!LIST_IS_EMPTY(&ctx->fences.pool)) {
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 2551aa26f2a..0f226ebd52a 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -86,7 +86,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
 		if (shader->bo == NULL) {
 			return -ENOMEM;
 		}
-		ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
+		ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
 		if (R600_BIG_ENDIAN) {
 			for (i = 0; i < rshader->bc.ndw; ++i) {
 				ptr[i] = bswap_32(rshader->bc.bytecode[i]);
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 386d8f35015..f9f0d702008 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -682,10 +682,11 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx,
 void* r600_texture_transfer_map(struct pipe_context *ctx,
 				struct pipe_transfer* transfer)
 {
+	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
 	struct r600_bo *bo;
 	enum pipe_format format = transfer->resource->format;
-	struct radeon *radeon = ((struct r600_screen*)ctx->screen)->radeon;
+	struct radeon *radeon = rctx->screen->radeon;
 	unsigned offset = 0;
 	char *map;
 
@@ -704,7 +705,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
 			transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
 	}
 
-	if (!(map = r600_bo_map(radeon, bo, transfer->usage, ctx))) {
+	if (!(map = r600_bo_map(radeon, bo, rctx->ctx.cs, transfer->usage))) {
 		return NULL;
 	}
 
diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 0b2a9aabac3..184efcc0e9a 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -107,48 +107,14 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan
 	return bo;
 }
 
-void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx)
+void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, struct radeon_winsys_cs *cs, unsigned usage)
 {
-	struct pipe_context *pctx = ctx;
-
-	if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
-		radeon_bo_map(radeon, bo->bo);
-		return (uint8_t *) bo->bo->data;
-	}
-
-	if (p_atomic_read(&bo->bo->reference.count) > 1) {
-		if (usage & PIPE_TRANSFER_DONTBLOCK) {
-			return NULL;
-		}
-		if (ctx) {
-                        pctx->flush(pctx, NULL);
-		}
-	}
-
-	if (usage & PIPE_TRANSFER_DONTBLOCK) {
-		uint32_t domain;
-
-		if (radeon_bo_busy(radeon, bo->bo, &domain))
-			return NULL;
-		if (radeon_bo_map(radeon, bo->bo)) {
-			return NULL;
-		}
-		goto out;
-	}
-
-	radeon_bo_map(radeon, bo->bo);
-	if (radeon_bo_wait(radeon, bo->bo)) {
-		radeon_bo_unmap(radeon, bo->bo);
-		return NULL;
-	}
-
-out:
-	return (uint8_t *) bo->bo->data;
+	return radeon->ws->buffer_map(bo->bo->buf, cs, usage);
 }
 
 void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo)
 {
-	radeon_bo_unmap(radeon, bo->bo);
+	radeon->ws->buffer_unmap(bo->bo->buf);
 }
 
 void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo)
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 760bfc591c0..0ebb5ed0385 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -74,7 +74,7 @@ void r600_get_backend_mask(struct r600_context *ctx)
 		goto err;
 
 	/* initialize buffer with zeroes */
-	results = r600_bo_map(ctx->radeon, buffer, PB_USAGE_CPU_WRITE, NULL);
+	results = r600_bo_map(ctx->radeon, buffer, ctx->cs, PIPE_TRANSFER_WRITE);
 	if (results) {
 		memset(results, 0, ctx->max_db * 4 * 4);
 		r600_bo_unmap(ctx->radeon, buffer);
@@ -92,7 +92,7 @@ void r600_get_backend_mask(struct r600_context *ctx)
 		r600_context_flush(ctx, 0);
 
 		/* analyze results */
-		results = r600_bo_map(ctx->radeon, buffer, PB_USAGE_CPU_READ, NULL);
+		results = r600_bo_map(ctx->radeon, buffer, ctx->cs, PIPE_TRANSFER_READ);
 		if (results) {
 			for(i = 0; i < ctx->max_db; i++) {
 				/* at least highest bit will be set if backend is used */
@@ -1576,9 +1576,9 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu
 	u32 *results, *current_result;
 
 	if (wait)
-		results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_READ, NULL);
+		results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_READ);
 	else
-		results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_READ, NULL);
+		results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_DONTBLOCK | PIPE_TRANSFER_READ);
 	if (!results)
 		return FALSE;
 
@@ -1646,7 +1646,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 		u32 *results;
 		int i;
 
-		results = r600_bo_map(ctx->radeon, query->buffer, PIPE_TRANSFER_WRITE, NULL);
+		results = r600_bo_map(ctx->radeon, query->buffer, ctx->cs, PIPE_TRANSFER_WRITE);
 		if (results) {
 			results = (u32*)((char*)results + query->results_end);
 			memset(results, 0, query->result_size);
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 952f91df89a..90860f4e31b 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -66,8 +66,6 @@ struct radeon_bo {
 	struct radeon_winsys_cs_handle	*cs_buf;
 	unsigned			handle;
 	unsigned			size;
-	int				map_count;
-	void				*data;
 
 	unsigned			last_flush;
 	unsigned                        binding;
@@ -92,9 +90,6 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
 			    unsigned size, unsigned alignment, unsigned bind, unsigned initial_domain);
 void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst,
 			 struct radeon_bo *src);
-int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo);
-int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain);
-int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo);
 
 /*
  * r600_hw_context.c
@@ -123,8 +118,8 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6
 
 	assert(bo != NULL);
 
-	reloc_index =
-		ctx->radeon->ws->cs_add_reloc(ctx->cs, bo->cs_buf, rbo->domains, rbo->domains);
+	reloc_index = ctx->radeon->ws->cs_add_reloc(ctx->cs, bo->cs_buf,
+						    rbo->domains, rbo->domains);
 
 	if (reloc_index >= ctx->creloc)
 		ctx->creloc = reloc_index+1;
@@ -138,22 +133,4 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6
  */
 void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo);
 
-
-/*
- * radeon_bo.c
- */
-static inline int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo)
-{
-	if (bo->map_count == 0 && !bo->data)
-		return radeon_bo_fixed_map(radeon, bo);
-	bo->map_count++;
-	return 0;
-}
-
-static inline void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo)
-{
-	bo->map_count--;
-	assert(bo->map_count >= 0);
-}
-
 #endif
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index f2f53a14a58..14f8d340d67 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -32,43 +32,6 @@
 #include <sys/mman.h>
 #include <errno.h>
 
-int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo)
-{
-	struct drm_radeon_gem_mmap args;
-	void *ptr;
-	int r;
-
-	/* Zero out args to make valgrind happy */
-	memset(&args, 0, sizeof(args));
-	args.handle = bo->handle;
-	args.offset = 0;
-	args.size = (uint64_t)bo->size;
-	r = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_MMAP,
-				&args, sizeof(args));
-	if (r) {
-		fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n",
-			bo, bo->handle, r);
-		return r;
-	}
-	ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, radeon->info.fd, args.addr_ptr);
-	if (ptr == MAP_FAILED) {
-		fprintf(stderr, "%s failed to map bo\n", __func__);
-		return -errno;
-	}
-	bo->data = ptr;
-
-	bo->map_count++;
-	return 0;
-}
-
-static void radeon_bo_fixed_unmap(struct radeon *radeon, struct radeon_bo *bo)
-{
-	if (bo->data) {
-		munmap(bo->data, bo->size);
-		bo->data = NULL;
-	}
-}
-
 #include "state_tracker/drm_driver.h"
 
 struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
@@ -102,7 +65,6 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
 
 static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo)
 {
-	radeon_bo_fixed_unmap(radeon, bo);
 	pb_reference(&bo->buf, NULL);
 	FREE(bo);
 }
@@ -117,34 +79,3 @@ void radeon_bo_reference(struct radeon *radeon,
 	}
 	*dst = src;
 }
-
-int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo)
-{
-	struct drm_radeon_gem_wait_idle args;
-	int ret;
-
-	/* Zero out args to make valgrind happy */
-	memset(&args, 0, sizeof(args));
-	args.handle = bo->handle;
-	do {
-		ret = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_WAIT_IDLE,
-					&args, sizeof(args));
-	} while (ret == -EBUSY);
-	return ret;
-}
-
-int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain)
-{
-	struct drm_radeon_gem_busy args;
-	int ret;
-
-	memset(&args, 0, sizeof(args));
-	args.handle = bo->handle;
-	args.domain = 0;
-
-	ret = drmCommandWriteRead(radeon->info.fd, DRM_RADEON_GEM_BUSY,
-			&args, sizeof(args));
-
-	*domain = args.domain;
-	return ret;
-}

From 5229ba494b4b3f19085d13131a37626b914d4014 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 05:40:16 +0200
Subject: [PATCH 330/600] r600g: remove radeon_bo::handle

This should be private to radeon_winsys.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/r600/drm/r600_hw_context.c | 6 +++---
 src/gallium/winsys/r600/drm/r600_priv.h       | 1 -
 src/gallium/winsys/r600/drm/radeon_bo.c       | 1 -
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 7 -------
 src/gallium/winsys/radeon/drm/radeon_winsys.h | 4 ----
 5 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 0ebb5ed0385..b2da3eb0458 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -1130,11 +1130,11 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_
 
 	if (!dirty) {
 		if (is_vertex) {
-			if (block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle)
+			if (block->reloc[1].bo->bo->buf != state->bo[0]->bo->buf)
 				dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
 		} else {
-			if ((block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) ||
-			    (block->reloc[2].bo->bo->handle != state->bo[1]->bo->handle))
+			if ((block->reloc[1].bo->bo->buf != state->bo[0]->bo->buf) ||
+			    (block->reloc[2].bo->bo->buf != state->bo[1]->bo->buf))
 				dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
 		}
 	}
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 90860f4e31b..1f311c4d5e3 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -64,7 +64,6 @@ struct radeon_bo {
 	struct pipe_reference		reference;
 	struct pb_buffer		*buf;
 	struct radeon_winsys_cs_handle	*cs_buf;
-	unsigned			handle;
 	unsigned			size;
 
 	unsigned			last_flush;
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
index 14f8d340d67..1d3766e55b5 100644
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@@ -58,7 +58,6 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
 		return NULL;
 	}
 	bo->cs_buf = radeon->ws->buffer_get_cs_handle(bo->buf);
-	bo->handle = radeon->ws->trans_get_buffer_handle(bo->buf);
 	bo->size = size;
 	return bo;
 }
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 58898d3423e..609a9065db8 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -617,11 +617,6 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
     return TRUE;
 }
 
-static unsigned trans_get_buffer_handle(struct pb_buffer *buf)
-{
-	return get_radeon_bo(buf)->handle;
-}
-
 void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
 {
     ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
@@ -634,6 +629,4 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
     ws->base.buffer_create = radeon_winsys_bo_create;
     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
-
-    ws->base.trans_get_buffer_handle = trans_get_buffer_handle;
 }
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 47274055207..9f70c5c2f9f 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -329,10 +329,6 @@ struct radeon_winsys {
     boolean (*cs_request_feature)(struct radeon_winsys_cs *cs,
                                   enum radeon_feature_id fid,
                                   boolean enable);
-
-
-    /* Transitional functions for r600g when moving to winsys/radeon */
-    unsigned (*trans_get_buffer_handle)(struct pb_buffer *buf);
 };
 
 #endif

From c6fec83726d3435a800f0a4e3ded89628b1a504f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 06:11:45 +0200
Subject: [PATCH 331/600] r600g: merge radeon_bo with r600_bo

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600.h               |  2 +-
 src/gallium/winsys/r600/drm/Makefile          |  1 -
 src/gallium/winsys/r600/drm/SConscript        |  1 -
 src/gallium/winsys/r600/drm/r600_bo.c         | 28 ++++---
 src/gallium/winsys/r600/drm/r600_hw_context.c | 21 +++--
 src/gallium/winsys/r600/drm/r600_priv.h       | 34 ++------
 src/gallium/winsys/r600/drm/radeon_bo.c       | 80 -------------------
 7 files changed, 33 insertions(+), 134 deletions(-)
 delete mode 100644 src/gallium/winsys/r600/drm/radeon_bo.c

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 0c70fe2bb0a..a8626d1d2ec 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -242,7 +242,7 @@ struct r600_context {
 	unsigned		init_dwords;
 
 	unsigned		creloc;
-	struct radeon_bo	**bo;
+	struct r600_bo		**bo;
 
 	u32			*pm4;
 	unsigned		pm4_cdwords;
diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile
index e5b58d6cf87..5ad183d78ae 100644
--- a/src/gallium/winsys/r600/drm/Makefile
+++ b/src/gallium/winsys/r600/drm/Makefile
@@ -6,7 +6,6 @@ LIBNAME = r600winsys
 
 C_SOURCES = \
 	evergreen_hw_context.c \
-	radeon_bo.c \
 	radeon_pciid.c \
 	r600_bo.c \
 	r600_drm.c \
diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript
index 3665b6eaeef..ca51b52ea72 100644
--- a/src/gallium/winsys/r600/drm/SConscript
+++ b/src/gallium/winsys/r600/drm/SConscript
@@ -4,7 +4,6 @@ env = env.Clone()
 
 r600_sources = [
     'evergreen_hw_context.c',
-    'radeon_bo.c',
     'radeon_pciid.c',
     'r600_bo.c',
     'r600_drm.c',
diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 184efcc0e9a..b40508665b7 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -33,7 +33,7 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 			unsigned binding, unsigned usage)
 {
 	struct r600_bo *bo;
-	struct radeon_bo *rbo;
+	struct pb_buffer *pb;
 	uint32_t initial_domain, domains;
 	  
 	/* Staging resources particpate in transfers and blits only
@@ -61,14 +61,15 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 		}
 	}
 
-	rbo = radeon_bo(radeon, 0, size, alignment, binding, initial_domain);
-	if (rbo == NULL) {
+	pb = radeon->ws->buffer_create(radeon->ws, size, alignment, binding, initial_domain);
+	if (!pb) {
 		return NULL;
 	}
 
 	bo = calloc(1, sizeof(struct r600_bo));
 	bo->domains = domains;
-	bo->bo = rbo;
+	bo->buf = pb;
+	bo->cs_buf = radeon->ws->buffer_get_cs_handle(pb);
 
 	pipe_reference_init(&bo->reference, 1);
 	return bo;
@@ -77,17 +78,18 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whandle,
 			       unsigned *stride, unsigned *array_mode)
 {
+	struct pb_buffer *pb;
 	struct r600_bo *bo = calloc(1, sizeof(struct r600_bo));
-	struct radeon_bo *rbo;
 
-	rbo = bo->bo = radeon_bo(radeon, whandle->handle, 0, 0, 0, 0);
-	if (rbo == NULL) {
+	pb = bo->buf = radeon->ws->buffer_from_handle(radeon->ws, whandle, stride, NULL);
+	if (!pb) {
 		free(bo);
 		return NULL;
 	}
 
 	pipe_reference_init(&bo->reference, 1);
 	bo->domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
+	bo->cs_buf = radeon->ws->buffer_get_cs_handle(pb);
 
 	if (stride)
 		*stride = whandle->stride;
@@ -95,7 +97,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan
 	if (array_mode) {
 		enum radeon_bo_layout micro, macro;
 
-		radeon->ws->buffer_get_tiling(rbo->buf, &micro, &macro);
+		radeon->ws->buffer_get_tiling(bo->buf, &micro, &macro);
 
 		if (macro == RADEON_LAYOUT_TILED)
 			*array_mode = V_0280A0_ARRAY_2D_TILED_THIN1;
@@ -109,22 +111,22 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan
 
 void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, struct radeon_winsys_cs *cs, unsigned usage)
 {
-	return radeon->ws->buffer_map(bo->bo->buf, cs, usage);
+	return radeon->ws->buffer_map(bo->buf, cs, usage);
 }
 
 void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo)
 {
-	radeon->ws->buffer_unmap(bo->bo->buf);
+	radeon->ws->buffer_unmap(bo->buf);
 }
 
 void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo)
 {
-	radeon_bo_reference(radeon, &bo->bo, NULL);
+	pb_reference(&bo->buf, NULL);
 	free(bo);
 }
 
 boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *bo,
-				unsigned stride, struct winsys_handle *whandle)
+				  unsigned stride, struct winsys_handle *whandle)
 {
-	return radeon->ws->buffer_get_handle(bo->bo->buf, stride, whandle);
+	return radeon->ws->buffer_get_handle(bo->buf, stride, whandle);
 }
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index b2da3eb0458..38713aad1fe 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -951,11 +951,8 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags)
 }
 
 void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
-				unsigned flush_mask, struct r600_bo *rbo)
+				unsigned flush_mask, struct r600_bo *bo)
 {
-	struct radeon_bo *bo;
-
-	bo = rbo->bo;
 	/* if bo has already been flushed */
 	if (!(~bo->last_flush & flush_flags)) {
 		bo->last_flush &= flush_mask;
@@ -987,11 +984,11 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 	} else {
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing);
 		ctx->pm4[ctx->pm4_cdwords++] = flush_flags;
-		ctx->pm4[ctx->pm4_cdwords++] = (bo->size + 255) >> 8;
+		ctx->pm4[ctx->pm4_cdwords++] = (bo->buf->base.size + 255) >> 8;
 		ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
 		ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, rbo);
+		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo);
 	}
 	bo->last_flush = (bo->last_flush | flush_flags) & flush_mask;
 }
@@ -1107,7 +1104,7 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_
 	if (state == NULL) {
 		block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_RESOURCE_DIRTY);
 		if (block->reloc[1].bo)
-			block->reloc[1].bo->bo->binding &= ~BO_BOUND_TEXTURE;
+			block->reloc[1].bo->binding &= ~BO_BOUND_TEXTURE;
 
 		r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
 		r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL);
@@ -1130,11 +1127,11 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_
 
 	if (!dirty) {
 		if (is_vertex) {
-			if (block->reloc[1].bo->bo->buf != state->bo[0]->bo->buf)
+			if (block->reloc[1].bo->buf != state->bo[0]->buf)
 				dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
 		} else {
-			if ((block->reloc[1].bo->bo->buf != state->bo[0]->bo->buf) ||
-			    (block->reloc[2].bo->bo->buf != state->bo[1]->bo->buf))
+			if ((block->reloc[1].bo->buf != state->bo[0]->buf) ||
+			    (block->reloc[2].bo->buf != state->bo[1]->buf))
 				dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY;
 		}
 	}
@@ -1150,7 +1147,7 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_
 			/* TEXTURE RESOURCE */
 			r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]);
 			r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[1]);
-			state->bo[0]->bo->binding |= BO_BOUND_TEXTURE;
+			state->bo[0]->binding |= BO_BOUND_TEXTURE;
 		}
 
 		if (is_vertex)
@@ -1515,7 +1512,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 	/* restart */
 	for (int i = 0; i < ctx->creloc; i++) {
 		ctx->bo[i]->last_flush = 0;
-		radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
+		r600_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
 	}
 	ctx->creloc = 0;
 	ctx->pm4_dirty_cdwords = 0;
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 1f311c4d5e3..82deeb8496e 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -60,21 +60,15 @@ struct r600_reg {
 };
 
 #define BO_BOUND_TEXTURE 1
-struct radeon_bo {
-	struct pipe_reference		reference;
-	struct pb_buffer		*buf;
-	struct radeon_winsys_cs_handle	*cs_buf;
-	unsigned			size;
-
-	unsigned			last_flush;
-	unsigned                        binding;
-};
 
 struct r600_bo {
 	struct pipe_reference		reference; /* this must be the first member for the r600_bo_reference inline to work */
 	/* DO NOT MOVE THIS ^ */
+	struct pb_buffer		*buf;
+	struct radeon_winsys_cs_handle	*cs_buf;
 	unsigned			domains;
-	struct radeon_bo		*bo;
+	unsigned			last_flush;
+	unsigned                        binding;
 };
 
 /*
@@ -82,14 +76,6 @@ struct r600_bo {
  */
 unsigned radeon_family_from_device(unsigned device);
 
-/*
- * radeon_bo.c
- */
-struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
-			    unsigned size, unsigned alignment, unsigned bind, unsigned initial_domain);
-void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst,
-			 struct radeon_bo *src);
-
 /*
  * r600_hw_context.c
  */
@@ -112,18 +98,14 @@ int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsig
 
 static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo)
 {
-	struct radeon_bo *bo = rbo->bo;
-	unsigned reloc_index;
-
-	assert(bo != NULL);
-
-	reloc_index = ctx->radeon->ws->cs_add_reloc(ctx->cs, bo->cs_buf,
-						    rbo->domains, rbo->domains);
+	unsigned reloc_index =
+		ctx->radeon->ws->cs_add_reloc(ctx->cs, rbo->cs_buf,
+					      rbo->domains, rbo->domains);
 
 	if (reloc_index >= ctx->creloc)
 		ctx->creloc = reloc_index+1;
 
-	radeon_bo_reference(ctx->radeon, &ctx->bo[reloc_index], bo);
+	r600_bo_reference(ctx->radeon, &ctx->bo[reloc_index], rbo);
 	return reloc_index * 4;
 }
 
diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c
deleted file mode 100644
index 1d3766e55b5..00000000000
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *      Jerome Glisse
- */
-#define _FILE_OFFSET_BITS 64
-#include "r600_priv.h"
-#include "util/u_hash_table.h"
-#include "util/u_memory.h"
-#include "radeon_drm.h"
-#include "xf86drm.h"
-#include <sys/mman.h>
-#include <errno.h>
-
-#include "state_tracker/drm_driver.h"
-
-struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
-			    unsigned size, unsigned alignment, unsigned bind,
-			    unsigned initial_domain)
-{
-	struct radeon_bo *bo;
-	struct winsys_handle whandle = {};
-	whandle.handle = handle;
-
-	bo = calloc(1, sizeof(*bo));
-	if (bo == NULL) {
-		return NULL;
-	}
-	pipe_reference_init(&bo->reference, 1);
-
-	if (handle) {
-		bo->buf = radeon->ws->buffer_from_handle(radeon->ws, &whandle, NULL, &size);
-	} else {
-		bo->buf = radeon->ws->buffer_create(radeon->ws, size, alignment, bind, initial_domain);
-	}
-	if (!bo->buf) {
-		FREE(bo);
-		return NULL;
-	}
-	bo->cs_buf = radeon->ws->buffer_get_cs_handle(bo->buf);
-	bo->size = size;
-	return bo;
-}
-
-static void radeon_bo_destroy(struct radeon *radeon, struct radeon_bo *bo)
-{
-	pb_reference(&bo->buf, NULL);
-	FREE(bo);
-}
-
-void radeon_bo_reference(struct radeon *radeon,
-			 struct radeon_bo **dst,
-			 struct radeon_bo *src)
-{
-	struct radeon_bo *old = *dst;
-	if (pipe_reference(&(*dst)->reference, &src->reference)) {
-		radeon_bo_destroy(radeon, old);
-	}
-	*dst = src;
-}

From 041ed559e11ee99d720c8132428c07d8fe57ec81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 06:17:39 +0200
Subject: [PATCH 332/600] r600g: remove an unused parameter from
 r600_bo_destroy

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600.h               |  6 ++---
 src/gallium/drivers/r600/r600_asm.c           |  2 +-
 src/gallium/drivers/r600/r600_buffer.c        |  4 ++--
 src/gallium/drivers/r600/r600_pipe.c          |  2 +-
 src/gallium/drivers/r600/r600_shader.c        |  4 +---
 src/gallium/drivers/r600/r600_state_common.c  |  4 ++--
 src/gallium/drivers/r600/r600_texture.c       |  3 +--
 src/gallium/winsys/r600/drm/r600_bo.c         |  2 +-
 src/gallium/winsys/r600/drm/r600_hw_context.c | 24 +++++++++----------
 src/gallium/winsys/r600/drm/r600_priv.h       |  7 +-----
 10 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index a8626d1d2ec..b5d2d74628a 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -107,15 +107,15 @@ void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo);
 boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo,
 				  unsigned stride, struct winsys_handle *whandle);
 
-void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo);
+void r600_bo_destroy(struct r600_bo *bo);
 
 /* this relies on the pipe_reference being the first member of r600_bo */
-static INLINE void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, struct r600_bo *src)
+static INLINE void r600_bo_reference(struct r600_bo **dst, struct r600_bo *src)
 {
 	struct r600_bo *old = *dst;
 
 	if (pipe_reference((struct pipe_reference *)(*dst), (struct pipe_reference *)src)) {
-		r600_bo_destroy(radeon, old);
+		r600_bo_destroy(old);
 	}
 	*dst = src;
 }
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index f5244a723f4..6092432e6f2 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2234,7 +2234,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 	bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
 	if (bytecode == NULL) {
 		r600_bc_clear(&bc);
-		r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+		r600_bo_reference(&ve->fetch_shader, NULL);
 		return -ENOMEM;
 	}
 
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index b8c6a419748..bc70578dc9f 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -46,7 +46,7 @@ static void r600_buffer_destroy(struct pipe_screen *screen,
 	struct r600_resource_buffer *rbuffer = r600_buffer(buf);
 
 	if (rbuffer->r.bo) {
-		r600_bo_reference(rscreen->radeon, &rbuffer->r.bo, NULL);
+		r600_bo_reference(&rbuffer->r.bo, NULL);
 	}
 	rbuffer->r.bo = NULL;
 	util_slab_free(&rscreen->pool_buffers, rbuffer);
@@ -230,7 +230,7 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
 
 	rbuffer = CALLOC_STRUCT(r600_resource);
 	if (rbuffer == NULL) {
-		r600_bo_reference(rw, &bo, NULL);
+		r600_bo_reference(&bo, NULL);
 		return NULL;
 	}
 
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 1072ea0744d..4051584f272 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -187,7 +187,7 @@ static void r600_destroy_context(struct pipe_context *context)
 		}
 
 		r600_bo_unmap(rctx->radeon, rctx->fences.bo);
-		r600_bo_reference(rctx->radeon, &rctx->fences.bo, NULL);
+		r600_bo_reference(&rctx->fences.bo, NULL);
 	}
 
 	r600_update_num_contexts(rctx->screen, -1);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 0f226ebd52a..f86804eadcf 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -154,9 +154,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
 
 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
 {
-	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
-
-	r600_bo_reference(rctx->radeon, &shader->bo, NULL);
+	r600_bo_reference(&shader->bo, NULL);
 	r600_bc_clear(&shader->shader.bc);
 
 	memset(&shader->shader,0,sizeof(struct r600_shader));
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 9f3ab89fdf7..2831517fe86 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -150,7 +150,7 @@ void r600_delete_state(struct pipe_context *ctx, void *state)
 		rctx->states[rstate->id] = NULL;
 	}
 	for (int i = 0; i < rstate->nregs; i++) {
-		r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL);
+		r600_bo_reference(&rstate->regs[i].bo, NULL);
 	}
 	free(rstate);
 }
@@ -181,7 +181,7 @@ void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
 	if (rctx->vertex_elements == state)
 		rctx->vertex_elements = NULL;
 
-	r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL);
+	r600_bo_reference(&v->fetch_shader, NULL);
 	u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements);
 	FREE(state);
 }
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index f9f0d702008..ed0b2ec2890 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -339,13 +339,12 @@ static void r600_texture_destroy(struct pipe_screen *screen,
 {
 	struct r600_resource_texture *rtex = (struct r600_resource_texture*)ptex;
 	struct r600_resource *resource = &rtex->resource;
-	struct radeon *radeon = ((struct r600_screen*)screen)->radeon;
 
 	if (rtex->flushed_depth_texture)
 		pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
 
 	if (resource->bo) {
-		r600_bo_reference(radeon, &resource->bo, NULL);
+		r600_bo_reference(&resource->bo, NULL);
 	}
 	FREE(rtex);
 }
diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index b40508665b7..123f718e664 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -119,7 +119,7 @@ void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo)
 	radeon->ws->buffer_unmap(bo->buf);
 }
 
-void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo)
+void r600_bo_destroy(struct r600_bo *bo)
 {
 	pb_reference(&bo->buf, NULL);
 	free(bo);
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 38713aad1fe..f39fc69aee7 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -103,7 +103,7 @@ void r600_get_backend_mask(struct r600_context *ctx)
 		}
 	}
 
-	r600_bo_reference(ctx->radeon, &buffer, NULL);
+	r600_bo_reference(&buffer, NULL);
 
 	if (mask != 0) {
 		ctx->backend_mask = mask;
@@ -738,7 +738,7 @@ static void r600_free_resource_range(struct r600_context *ctx, struct r600_range
 		block = range->blocks[i];
 		if (block) {
 			for (int k = 1; k <= block->nbo; k++)
-				r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL);
+				r600_bo_reference(&block->reloc[k].bo, NULL);
 			free(block);
 		}
 	}
@@ -763,7 +763,7 @@ void r600_context_fini(struct r600_context *ctx)
 					range->blocks[CTX_BLOCK_ID(offset)] = NULL;
 				}
 				for (int k = 1; k <= block->nbo; k++) {
-					r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL);
+					r600_bo_reference(&block->reloc[k].bo, NULL);
 				}
 				free(block);
 			}
@@ -1068,7 +1068,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
 		if (block->pm4_bo_index[id]) {
 			/* find relocation */
 			reloc_id = block->pm4_bo_index[id];
-			r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, reg->bo);
+			r600_bo_reference(&block->reloc[reloc_id].bo, reg->bo);
 			/* always force dirty for relocs for now */
 			dirty |= R600_BLOCK_STATUS_DIRTY;
 		}
@@ -1106,8 +1106,8 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_
 		if (block->reloc[1].bo)
 			block->reloc[1].bo->binding &= ~BO_BOUND_TEXTURE;
 
-		r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
-		r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL);
+		r600_bo_reference(&block->reloc[1].bo, NULL);
+		r600_bo_reference(&block->reloc[2].bo, NULL);
 		LIST_DELINIT(&block->list);
 		LIST_DELINIT(&block->enable_list);
 		return;
@@ -1141,12 +1141,12 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_
 			/* VERTEX RESOURCE, we preted there is 2 bo to relocate so
 			 * we have single case btw VERTEX & TEXTURE resource
 			 */
-			r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]);
-			r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL);
+			r600_bo_reference(&block->reloc[1].bo, state->bo[0]);
+			r600_bo_reference(&block->reloc[2].bo, NULL);
 		} else {
 			/* TEXTURE RESOURCE */
-			r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]);
-			r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[1]);
+			r600_bo_reference(&block->reloc[1].bo, state->bo[0]);
+			r600_bo_reference(&block->reloc[2].bo, state->bo[1]);
 			state->bo[0]->binding |= BO_BOUND_TEXTURE;
 		}
 
@@ -1512,7 +1512,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 	/* restart */
 	for (int i = 0; i < ctx->creloc; i++) {
 		ctx->bo[i]->last_flush = 0;
-		r600_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
+		r600_bo_reference(&ctx->bo[i], NULL);
 	}
 	ctx->creloc = 0;
 	ctx->pm4_dirty_cdwords = 0;
@@ -1793,7 +1793,7 @@ struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned
 
 void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
 {
-	r600_bo_reference(ctx->radeon, &query->buffer, NULL);
+	r600_bo_reference(&query->buffer, NULL);
 	LIST_DELINIT(&query->list);
 	free(query);
 }
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index 82deeb8496e..df412a08144 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -105,13 +105,8 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r6
 	if (reloc_index >= ctx->creloc)
 		ctx->creloc = reloc_index+1;
 
-	r600_bo_reference(ctx->radeon, &ctx->bo[reloc_index], rbo);
+	r600_bo_reference(&ctx->bo[reloc_index], rbo);
 	return reloc_index * 4;
 }
 
-/*
- * r600_bo.c
- */
-void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo);
-
 #endif

From efbccfeca071b052bb8da0a7f0277000869b2ea1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 06:19:17 +0200
Subject: [PATCH 333/600] winsys/radeon: remove the device file descriptor from
 the interface

r600g doesn't need it anymore.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 1 -
 src/gallium/winsys/radeon/drm/radeon_winsys.h     | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 1f3bd6dd7bd..e234321d934 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -316,7 +316,6 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd)
     }
 
     ws->fd = fd;
-    ws->info.fd = fd;
 
     if (!do_winsys_init(ws))
         goto fail;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 9f70c5c2f9f..bf5b144fe2c 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -73,7 +73,6 @@ struct radeon_info {
     uint32_t pci_id;
     uint32_t gart_size;
     uint32_t vram_size;
-    uint32_t fd; /* XXX transitional */
 
     uint32_t drm_major; /* version */
     uint32_t drm_minor;

From d6da5185f96c1a85390e08dc2ef36c04d6e0de11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 06:23:59 +0200
Subject: [PATCH 334/600] r600g: don't include radeon_drm.h and xf86drm.h

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600_buffer.c             |  3 ---
 src/gallium/winsys/r600/drm/evergreen_hw_context.c |  2 --
 src/gallium/winsys/r600/drm/r600_bo.c              | 13 ++++++-------
 src/gallium/winsys/r600/drm/r600_drm.c             |  2 --
 src/gallium/winsys/r600/drm/r600_hw_context.c      |  2 --
 src/gallium/winsys/r600/drm/r600_priv.h            |  1 -
 6 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index bc70578dc9f..ca2415adb28 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -33,9 +33,6 @@
 #include <util/u_memory.h>
 #include "util/u_upload_mgr.h"
 
-#include <xf86drm.h>
-#include "radeon_drm.h"
-
 #include "r600.h"
 #include "r600_pipe.h"
 
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 1d582ceeaa5..54e26b3f0e9 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -27,8 +27,6 @@
 #include "r600_priv.h"
 #include "evergreend.h"
 #include "util/u_memory.h"
-#include "radeon_drm.h"
-#include "xf86drm.h"
 #include <errno.h>
 
 #define GROUP_FORCE_NEW_BLOCK	0
diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 123f718e664..4beedad233e 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -26,7 +26,6 @@
 #include "r600_priv.h"
 #include "r600d.h"
 #include "state_tracker/drm_driver.h"
-#include "radeon_drm.h"
 
 struct r600_bo *r600_bo(struct radeon *radeon,
 			unsigned size, unsigned alignment,
@@ -41,22 +40,22 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 	 * resources.  We generate them internally for some transfers.
 	 */
 	if (usage == PIPE_USAGE_STAGING) {
-		domains = RADEON_GEM_DOMAIN_GTT;
-		initial_domain = RADEON_GEM_DOMAIN_GTT;
+		domains = RADEON_DOMAIN_GTT;
+		initial_domain = RADEON_DOMAIN_GTT;
 	} else {
-		domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
+		domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
 
 		switch(usage) {
 		case PIPE_USAGE_DYNAMIC:
 		case PIPE_USAGE_STREAM:
 		case PIPE_USAGE_STAGING:
-			initial_domain = RADEON_GEM_DOMAIN_GTT;
+			initial_domain = RADEON_DOMAIN_GTT;
 			break;
 		case PIPE_USAGE_DEFAULT:
 		case PIPE_USAGE_STATIC:
 		case PIPE_USAGE_IMMUTABLE:
 		default:
-			initial_domain = RADEON_GEM_DOMAIN_VRAM;
+			initial_domain = RADEON_DOMAIN_VRAM;
 			break;
 		}
 	}
@@ -88,7 +87,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, struct winsys_handle *whan
 	}
 
 	pipe_reference_init(&bo->reference, 1);
-	bo->domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
+	bo->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
 	bo->cs_buf = radeon->ws->buffer_get_cs_handle(pb);
 
 	if (stride)
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index a1b0ba1fb0f..17d098aed96 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -29,8 +29,6 @@
 #include "r600_priv.h"
 #include "r600_drm_public.h"
 #include "util/u_memory.h"
-#include <radeon_drm.h>
-#include <xf86drm.h>
 #include <errno.h>
 
 #ifndef RADEON_INFO_NUM_TILE_PIPES
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index f39fc69aee7..577988d37ea 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -26,8 +26,6 @@
 #include "r600_priv.h"
 #include "r600d.h"
 #include "util/u_memory.h"
-#include "radeon_drm.h"
-#include "xf86drm.h"
 #include <errno.h>
 
 #define GROUP_FORCE_NEW_BLOCK	0
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index df412a08144..c5b82fd43ae 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -30,7 +30,6 @@
 #include "../../radeon/drm/radeon_winsys.h"
 #include "util/u_hash_table.h"
 #include "os/os_thread.h"
-#include "radeon_drm.h"
 
 #define PKT_COUNT_C                     0xC000FFFF
 #define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)

From 0bbbd82488c11710aaca79ed3db2b605888ac65d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 06:33:04 +0200
Subject: [PATCH 335/600] r600g: undefine RADEON_CTX_MAX_PM4

winsys/radeon has its own definition.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600.h                    | 2 --
 src/gallium/winsys/r600/drm/evergreen_hw_context.c | 6 +++---
 src/gallium/winsys/r600/drm/r600_hw_context.c      | 6 +++---
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index b5d2d74628a..232912f914d 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -29,8 +29,6 @@
 #include "util/u_double_list.h"
 #include "util/u_inlines.h"
 
-#define RADEON_CTX_MAX_PM4	(64 * 1024 / 4)
-
 #define R600_ERR(fmt, args...) \
 	fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)
 
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 54e26b3f0e9..63b3dc3940d 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -987,12 +987,12 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)
 	ctx->cs = radeon->ws->cs_create(radeon->ws);
 
 	/* allocate cs variables */
-	ctx->bo = calloc(RADEON_CTX_MAX_PM4, sizeof(void *));
+	ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *));
 	if (ctx->bo == NULL) {
 		r = -ENOMEM;
 		goto out_err;
 	}
-	ctx->pm4_ndwords = RADEON_CTX_MAX_PM4;
+	ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS;
 	ctx->pm4 = ctx->cs->buf;
 
 	r600_init_cs(ctx);
@@ -1152,7 +1152,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 
 	/* update the max dword count to make sure we have enough space
 	 * reserved for flushing the destination caches */
-	ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16;
+	ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS - ctx->num_dest_buffers * 7 - 16;
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 577988d37ea..f89e8d6548d 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -911,12 +911,12 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
 	ctx->cs = radeon->ws->cs_create(radeon->ws);
 
 	/* allocate cs variables */
-	ctx->bo = calloc(RADEON_CTX_MAX_PM4, sizeof(void *));
+	ctx->bo = calloc(RADEON_MAX_CMDBUF_DWORDS, sizeof(void *));
 	if (ctx->bo == NULL) {
 		r = -ENOMEM;
 		goto out_err;
 	}
-	ctx->pm4_ndwords = RADEON_CTX_MAX_PM4;
+	ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS;
 	ctx->pm4 = ctx->cs->buf;
 
 	r600_init_cs(ctx);
@@ -1432,7 +1432,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 
 	/* update the max dword count to make sure we have enough space
 	 * reserved for flushing the destination caches */
-	ctx->pm4_ndwords = RADEON_CTX_MAX_PM4 - ctx->num_dest_buffers * 7 - 16;
+	ctx->pm4_ndwords = RADEON_MAX_CMDBUF_DWORDS - ctx->num_dest_buffers * 7 - 16;
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */

From c79e9f0ed59d561849a0a4fbaafe87d5064d3e8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 4 Aug 2011 07:05:07 +0200
Subject: [PATCH 336/600] r600g: enable thread offloading

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600_pipe.c               |  5 ++---
 src/gallium/drivers/r600/r600_pipe.h               |  5 +++++
 src/gallium/drivers/r600/r600_texture.c            |  4 ++--
 src/gallium/winsys/r600/drm/evergreen_hw_context.c |  2 +-
 src/gallium/winsys/r600/drm/r600_drm.c             |  3 ---
 src/gallium/winsys/r600/drm/r600_hw_context.c      | 14 +++++++-------
 6 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 4051584f272..5d09d59e111 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -117,9 +117,8 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx)
 }
 
 
-static void r600_flush(struct pipe_context *ctx,
-		       struct pipe_fence_handle **fence,
-		       unsigned flags)
+void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
+		unsigned flags)
 {
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_fence **rfence = (struct r600_fence**)fence;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 3ca003aa244..7ab785ee2df 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -273,6 +273,11 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen,
 					      struct winsys_handle *whandle);
 void r600_upload_index_buffer(struct r600_pipe_context *rctx, struct r600_drawl *draw);
 
+
+/* r600_pipe.c */
+void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
+		unsigned flags);
+
 /* r600_query.c */
 void r600_init_query_functions(struct r600_pipe_context *rctx);
 
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index ed0b2ec2890..7b5a3e74a26 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -66,7 +66,7 @@ static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600
 				  rtransfer->staging_texture,
 				  0, &sbox);
 
-        ctx->flush(ctx, NULL);
+	r600_flush(ctx, NULL, RADEON_FLUSH_ASYNC);
 }
 
 unsigned r600_texture_get_offset(struct r600_resource_texture *rtex,
@@ -645,7 +645,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
 		if (usage & PIPE_TRANSFER_READ) {
 			r600_copy_to_staging_texture(ctx, trans);
 			/* Always referenced in the blit. */
-                        ctx->flush(ctx, NULL);
+			r600_flush(ctx, NULL, 0);
 		}
 		return &trans->transfer;
 	}
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 63b3dc3940d..eaf461833c7 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -1156,7 +1156,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx, 0);
+		r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 	}
 	/* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
 	if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) {
diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c
index 17d098aed96..7d5583fd287 100644
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -249,9 +249,6 @@ struct radeon *radeon_create(struct radeon_winsys *ws)
 	if (radeon_drm_get_tiling(radeon))
 		return NULL;
 
-	/* XXX disable ioctl thread offloading until the porting is done. */
-	setenv("RADEON_THREAD", "0", 0);
-
 	return radeon;
 }
 
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index f89e8d6548d..ba8d6c2aa64 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -938,7 +938,7 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags)
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx, 0);
+		r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 	}
 
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, ctx->predicate_drawing);
@@ -1436,7 +1436,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx, 0);
+		r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 	}
 	/* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
 	if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) {
@@ -1549,7 +1549,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo,
 
 	if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx, 0);
+		r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 	}
 
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
@@ -1611,7 +1611,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 
 	if ((required_space + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
 		/* need to flush */
-		r600_context_flush(ctx, 0);
+		r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 	}
 
 	if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) {
@@ -1622,7 +1622,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 			query->queries_emitted = 1;
 		} else {
 			if (++query->queries_emitted > query->buffer_size / query->result_size / 2)
-				r600_context_flush(ctx, 0);
+				r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 		}
 	}
 
@@ -1714,7 +1714,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 {
 	if (operation == PREDICATION_OP_CLEAR) {
 		if (ctx->pm4_cdwords + 3 > ctx->pm4_ndwords)
-			r600_context_flush(ctx, 0);
+			r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
@@ -1730,7 +1730,7 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 		count /= query->result_size;
 
 		if (ctx->pm4_cdwords + 5 * count > ctx->pm4_ndwords)
-			r600_context_flush(ctx, 0);
+			r600_context_flush(ctx, RADEON_FLUSH_ASYNC);
 
 		op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
 				(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);

From 296b8990956fcbd7ce47902d7c108a5973db9397 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Sun, 7 Aug 2011 18:42:29 +0200
Subject: [PATCH 337/600] winsys/radeon: remove broken bo-is-busy-for-write
 guessing

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 19 -------------------
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h |  7 -------
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c |  5 -----
 3 files changed, 31 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 609a9065db8..1c8a2b8305e 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -99,8 +99,6 @@ static void radeon_bo_wait(struct pb_buffer *_buf)
     args.handle = bo->handle;
     while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
                                &args, sizeof(args)) == -EBUSY);
-
-    bo->busy_for_write = FALSE;
 }
 
 static boolean radeon_bo_is_busy(struct pb_buffer *_buf)
@@ -117,8 +115,6 @@ static boolean radeon_bo_is_busy(struct pb_buffer *_buf)
     busy = drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
                                &args, sizeof(args)) != 0;
 
-    if (!busy)
-        bo->busy_for_write = FALSE;
     return busy;
 }
 
@@ -196,21 +192,6 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf,
                     /* XXX We could check whether the buffer is busy for write here. */
                     radeon_bo_wait((struct pb_buffer*)bo);
                 }
-#if 0
-                /* XXX This per-winsys busy-for-write tracking sucks.
-                 * What if some other process wrote something, e.g. using
-                 * DRI2CopyRegion? We wouldn't get the busy_for_write flag
-                 * set, skipping bo_wait.
-                 * We need to move the is-busy-for-write query into the kernel.
-                 */
-                } else if (bo->busy_for_write) {
-                    /* Update the busy_for_write field (done by radeon_bo_is_busy)
-                     * and wait if needed. */
-                    if (radeon_bo_is_busy((struct pb_buffer*)bo)) {
-                        radeon_bo_wait((struct pb_buffer*)bo);
-                    }
-                }
-#endif
             } else {
                 /* Mapping for write. */
                 if (radeon_bo_is_referenced_by_cs(cs, bo)) {
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index f4ea73a2210..047ea6b1cf2 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -60,13 +60,6 @@ struct radeon_bo {
      * thread, is this bo referenced in? */
     int num_active_ioctls;
 
-    /* Whether the buffer has been relocated for write and is busy since then.
-     * This field is updated in:
-     * - radeon_drm_cs_flush (to TRUE if it's relocated for write)
-     * - radeon_bo_is_busy (to FALSE if it's not busy)
-     * - radeon_bo_wait (to FALSE) */
-    boolean busy_for_write;
-
     boolean flinked;
     uint32_t flink;
 };
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 1ec324e5b74..c309354785a 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -411,11 +411,6 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
         for (i = 0; i < crelocs; i++) {
             /* Update the number of active asynchronous CS ioctls for the buffer. */
             p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls);
-
-            /* Update whether the buffer is busy for write. */
-            if (cs->csc->relocs[i].write_domain) {
-                cs->csc->relocs_bo[i]->busy_for_write = TRUE;
-            }
         }
 
         if (cs->ws->num_cpus > 1 && debug_get_option_thread() &&

From 1e3c81a068c4ae04cd1c6b18c687d5be69b7b8c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Sun, 7 Aug 2011 19:04:37 +0200
Subject: [PATCH 338/600] winsys/radeon: hook up the new DRM_RADEON_GEM_WAIT
 ioctl

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r300/r300_screen.c        |  6 +-
 src/gallium/drivers/r300/r300_transfer.c      |  2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 67 ++++++++++++++-----
 src/gallium/winsys/radeon/drm/radeon_winsys.h | 14 +++-
 4 files changed, 65 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 674bd24953c..13d25ba7dba 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -454,7 +454,7 @@ static boolean r300_fence_signalled(struct pipe_screen *screen,
     struct radeon_winsys *rws = r300_screen(screen)->rws;
     struct pb_buffer *rfence = (struct pb_buffer*)fence;
 
-    return !rws->buffer_is_busy(rfence);
+    return !rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
 }
 
 static boolean r300_fence_finish(struct pipe_screen *screen,
@@ -471,7 +471,7 @@ static boolean r300_fence_finish(struct pipe_screen *screen,
         timeout /= 1000;
 
         /* Wait in a loop. */
-        while (rws->buffer_is_busy(rfence)) {
+        while (rws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
             if (os_time_get() - start_time >= timeout) {
                 return FALSE;
             }
@@ -480,7 +480,7 @@ static boolean r300_fence_finish(struct pipe_screen *screen,
         return TRUE;
     }
 
-    rws->buffer_wait(rfence);
+    rws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
     return TRUE;
 }
 
diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c
index e2ea4cbf6c5..65964020adc 100644
--- a/src/gallium/drivers/r300/r300_transfer.c
+++ b/src/gallium/drivers/r300/r300_transfer.c
@@ -97,7 +97,7 @@ r300_texture_get_transfer(struct pipe_context *ctx,
         referenced_hw = TRUE;
     } else {
         referenced_hw =
-            r300->rws->buffer_is_busy(tex->buf);
+            r300->rws->buffer_is_busy(tex->buf, RADEON_USAGE_READWRITE);
     }
 
     blittable = desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 1c8a2b8305e..5c91ec48942 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -43,6 +43,21 @@
 #define RADEON_BO_FLAGS_MICRO_TILE  2
 #define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20
 
+#ifndef DRM_RADEON_GEM_WAIT
+#define DRM_RADEON_GEM_WAIT		0x2b
+
+#define RADEON_GEM_NO_WAIT	0x1
+#define RADEON_GEM_USAGE_READ	0x2
+#define RADEON_GEM_USAGE_WRITE	0x4
+
+struct drm_radeon_gem_wait {
+	uint32_t	handle;
+	uint32_t        flags;  /* one of RADEON_GEM_* */
+};
+
+#endif
+
+
 extern const struct pb_vtbl radeon_bo_vtbl;
 
 
@@ -87,35 +102,49 @@ static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf)
     return bo;
 }
 
-static void radeon_bo_wait(struct pb_buffer *_buf)
+static void radeon_bo_wait(struct pb_buffer *_buf, enum radeon_bo_usage usage)
 {
     struct radeon_bo *bo = get_radeon_bo(_buf);
-    struct drm_radeon_gem_wait_idle args = {};
 
     while (p_atomic_read(&bo->num_active_ioctls)) {
         sched_yield();
     }
 
-    args.handle = bo->handle;
-    while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
-                               &args, sizeof(args)) == -EBUSY);
+    if (bo->rws->info.drm_minor >= 12) {
+        struct drm_radeon_gem_wait args = {};
+        args.handle = bo->handle;
+        args.flags = usage;
+        while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT,
+                                   &args, sizeof(args)) == -EBUSY);
+    } else {
+        struct drm_radeon_gem_wait_idle args = {};
+        args.handle = bo->handle;
+        while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
+                                   &args, sizeof(args)) == -EBUSY);
+    }
 }
 
-static boolean radeon_bo_is_busy(struct pb_buffer *_buf)
+static boolean radeon_bo_is_busy(struct pb_buffer *_buf,
+                                 enum radeon_bo_usage usage)
 {
     struct radeon_bo *bo = get_radeon_bo(_buf);
-    struct drm_radeon_gem_busy args = {};
-    boolean busy;
 
     if (p_atomic_read(&bo->num_active_ioctls)) {
         return TRUE;
     }
 
-    args.handle = bo->handle;
-    busy = drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
-                               &args, sizeof(args)) != 0;
-
-    return busy;
+    if (bo->rws->info.drm_minor >= 12) {
+        struct drm_radeon_gem_wait args = {};
+        args.handle = bo->handle;
+        args.flags = usage | RADEON_GEM_NO_WAIT;
+        return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT,
+                                   &args, sizeof(args)) != 0;
+    } else {
+        struct drm_radeon_gem_busy args = {};
+        args.handle = bo->handle;
+        return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
+                                   &args, sizeof(args)) != 0;
+    }
 }
 
 static void radeon_bo_destroy(struct pb_buffer *_buf)
@@ -173,7 +202,7 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf,
                 return NULL;
             }
 
-            if (radeon_bo_is_busy((struct pb_buffer*)bo)) {
+            if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) {
                 return NULL;
             }
         } else {
@@ -187,10 +216,12 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf,
                  * Only check whether the buffer is being used for write. */
                 if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
                     cs->flush_cs(cs->flush_data, 0);
-                    radeon_bo_wait((struct pb_buffer*)bo);
+                    radeon_bo_wait((struct pb_buffer*)bo,
+                                   RADEON_USAGE_READWRITE);
                 } else {
                     /* XXX We could check whether the buffer is busy for write here. */
-                    radeon_bo_wait((struct pb_buffer*)bo);
+                    radeon_bo_wait((struct pb_buffer*)bo,
+                                   RADEON_USAGE_READWRITE);
                 }
             } else {
                 /* Mapping for write. */
@@ -202,7 +233,7 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf,
                         radeon_drm_cs_sync_flush(cs);
                 }
 
-                radeon_bo_wait((struct pb_buffer*)bo);
+                radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
             }
         }
     }
@@ -338,7 +369,7 @@ static boolean radeon_bomgr_is_buffer_busy(struct pb_manager *_mgr,
        return TRUE;
    }
 
-   if (radeon_bo_is_busy((struct pb_buffer*)bo)) {
+   if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) {
        return TRUE;
    }
 
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index bf5b144fe2c..90583e3ab8c 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -61,6 +61,12 @@ enum radeon_bo_domain { /* bitfield */
     RADEON_DOMAIN_VRAM = 4
 };
 
+enum radeon_bo_usage { /* bitfield */
+    RADEON_USAGE_READ = 2,
+    RADEON_USAGE_WRITE = 4,
+    RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE
+};
+
 struct winsys_handle;
 struct radeon_winsys_cs_handle;   /* for write_reloc etc. */
 
@@ -162,8 +168,10 @@ struct radeon_winsys {
      * Return TRUE if a buffer object is being used by the GPU.
      *
      * \param buf       A winsys buffer object.
+     * \param usage     Only check whether the buffer is busy for the given usage.
      */
-    boolean (*buffer_is_busy)(struct pb_buffer *buf);
+    boolean (*buffer_is_busy)(struct pb_buffer *buf,
+                              enum radeon_bo_usage usage);
 
     /**
      * Wait for a buffer object until it is not used by a GPU. This is
@@ -171,8 +179,10 @@ struct radeon_winsys {
      * and synchronizing to the fence.
      *
      * \param buf       A winsys buffer object to wait for.
+     * \param usage     Only wait until the buffer is idle for the given usage,
+     *                  but may still be busy for some other usage.
      */
-    void (*buffer_wait)(struct pb_buffer *buf);
+    void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage);
 
     /**
      * Return tiling flags describing a memory layout of a buffer object.

From ebfcc58b93cc08c534857c2314694e35b29690ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Sun, 7 Aug 2011 19:18:16 +0200
Subject: [PATCH 339/600] winsys/radeon: take advantage of the new ioctl

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 40 +++++++++++++------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 5c91ec48942..adfbefd897b 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -197,13 +197,33 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf,
     if (!(flags & PB_USAGE_UNSYNCHRONIZED)) {
         /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
         if (flags & PB_USAGE_DONTBLOCK) {
-            if (radeon_bo_is_referenced_by_cs(cs, bo)) {
-                cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
-                return NULL;
-            }
+            if (!(flags & PB_USAGE_CPU_WRITE)) {
+                /* Mapping for read.
+                 *
+                 * Since we are mapping for read, we don't need to wait
+                 * if the GPU is using the buffer for read too
+                 * (neither one is changing it).
+                 *
+                 * Only check whether the buffer is being used for write. */
+                if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
+                    cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
+                    return NULL;
+                }
 
-            if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) {
-                return NULL;
+                if (radeon_bo_is_busy((struct pb_buffer*)bo,
+                                      RADEON_USAGE_WRITE)) {
+                    return NULL;
+                }
+            } else {
+                if (radeon_bo_is_referenced_by_cs(cs, bo)) {
+                    cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
+                    return NULL;
+                }
+
+                if (radeon_bo_is_busy((struct pb_buffer*)bo,
+                                      RADEON_USAGE_READWRITE)) {
+                    return NULL;
+                }
             }
         } else {
             if (!(flags & PB_USAGE_CPU_WRITE)) {
@@ -216,13 +236,9 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf,
                  * Only check whether the buffer is being used for write. */
                 if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
                     cs->flush_cs(cs->flush_data, 0);
-                    radeon_bo_wait((struct pb_buffer*)bo,
-                                   RADEON_USAGE_READWRITE);
-                } else {
-                    /* XXX We could check whether the buffer is busy for write here. */
-                    radeon_bo_wait((struct pb_buffer*)bo,
-                                   RADEON_USAGE_READWRITE);
                 }
+                radeon_bo_wait((struct pb_buffer*)bo,
+                               RADEON_USAGE_WRITE);
             } else {
                 /* Mapping for write. */
                 if (radeon_bo_is_referenced_by_cs(cs, bo)) {

From 47dcfb8dab517e2c92af2f4813b0f5ad200b8b07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Sun, 7 Aug 2011 21:14:38 +0200
Subject: [PATCH 340/600] r600g: set read/write usage flags for each relocation

This takes advantage of the new GEM_WAIT ioctl when mapping buffers.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/evergreen_state.c    | 560 +++++++++---------
 src/gallium/drivers/r600/r600.h               |  20 +-
 src/gallium/drivers/r600/r600_pipe.h          |   6 +-
 src/gallium/drivers/r600/r600_state.c         | 370 ++++++------
 src/gallium/drivers/r600/r600_state_common.c  |  52 +-
 .../winsys/r600/drm/evergreen_hw_context.c    |   2 +-
 src/gallium/winsys/r600/drm/r600_hw_context.c |  45 +-
 src/gallium/winsys/r600/drm/r600_priv.h       |  10 +-
 8 files changed, 550 insertions(+), 515 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index c9eaf94a2ae..f82e20306d1 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -638,10 +638,10 @@ static void evergreen_set_blend_color(struct pipe_context *ctx,
 		return;
 
 	rstate->id = R600_PIPE_STATE_BLEND_COLOR;
-	r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]);
 	rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate;
@@ -686,13 +686,13 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx,
 	blend->cb_target_mask = target_mask;
 	
 	r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
-				color_control, 0xFFFFFFFD, NULL);
+				color_control, 0xFFFFFFFD, NULL, 0);
 
 	if (rctx->chip_class != CAYMAN)
-		r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0);
 	else {
-		r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL, 0);
 	}
 
 	for (int i = 0; i < 8; i++) {
@@ -723,7 +723,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx,
 		}
 	}
 	for (int i = 0; i < 8; i++) {
-		r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl[i], 0xFFFFFFFF, NULL, 0);
 	}
 
 	return rstate;
@@ -791,27 +791,27 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
 		S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
 		S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
 	/* TODO db_render_override depends on query */
-	r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028430_DB_STENCILREFMASK, stencil_ref_mask,
-				0xFFFFFFFF & C_028430_STENCILREF, NULL);
+				0xFFFFFFFF & C_028430_STENCILREF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf,
-				0xFFFFFFFF & C_028434_STENCILREF_BF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
+				0xFFFFFFFF & C_028434_STENCILREF_BF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL, 0);
 	/* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE,
 	 * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by
 	 * evergreen_pipe_shader_ps().*/
-	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL);
-	r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02800C_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AC8_DB_PRELOAD_CONTROL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL, 0);
 
 	return rstate;
 }
@@ -856,7 +856,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
 			tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
 		}
 	}
-	r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL, 0);
 
 	polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
 				state->fill_back != PIPE_POLYGON_MODE_FILL);
@@ -870,44 +870,44 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
 		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
 		S_028814_POLY_MODE(polygon_dual_mode) |
 		S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
-		S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL);
+		S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL,
 			S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
-			S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+			S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
 	/* point size 12.4 fixed point */
 	tmp = (unsigned)(state->point_size * 8.0);
-	r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL, 0);
 
 	tmp = (unsigned)state->line_width * 8;
-	r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL, 0);
 
 	if (rctx->chip_class == CAYMAN) {
-		r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL,
 					S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules),
-					0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+					0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
 
 
 	} else {
-		r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0);
 
-		r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
 
 		r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL,
 					S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules),
-					0xFFFFFFFF, NULL);
+					0xFFFFFFFF, NULL, 0);
 	}
-	r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL, 0);
 	return rstate;
 }
 
@@ -933,22 +933,22 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
 			S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
 			S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) |
 			S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
-			S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL);
+			S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
 			S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
 			S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)),
-			0xFFFFFFFF, NULL);
+			0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0,
 					S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
 					(state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
 					S_03C008_TYPE(1),
-					0xFFFFFFFF, NULL);
+					0xFFFFFFFF, NULL, 0);
 
 	if (uc.ui) {
-		r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg_noblock(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL, 0);
 	}
 	return rstate;
 }
@@ -1016,6 +1016,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
 
 	rstate->bo[0] = bo[0];
 	rstate->bo[1] = bo[1];
+	rstate->bo_usage[0] = RADEON_USAGE_READ;
+	rstate->bo_usage[1] = RADEON_USAGE_READ;
 	rstate->val[0] = (S_030000_DIM(r600_tex_dim(texture->target)) |
 			  S_030000_PITCH((pitch / 8) - 1) |
 			  S_030000_NON_DISP_TILING_ORDER(tile_type) |
@@ -1131,21 +1133,21 @@ static void evergreen_set_clip_state(struct pipe_context *ctx,
 	for (int i = 0; i < state->nr; i++) {
 		r600_pipe_state_add_reg(rstate,
 					R_0285BC_PA_CL_UCP0_X + i * 16,
-					fui(state->ucp[i][0]), 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][0]), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate,
 					R_0285C0_PA_CL_UCP0_Y + i * 16,
-					fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate,
 					R_0285C4_PA_CL_UCP0_Z + i * 16,
-					fui(state->ucp[i][2]), 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][2]), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate,
 					R_0285C8_PA_CL_UCP0_W + i * 16,
-					fui(state->ucp[i][3]), 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][3]), 0xFFFFFFFF, NULL, 0);
 	}
 	r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL,
 			S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) |
 			S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) |
-			S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL);
+			S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_CLIP]);
 	rctx->states[R600_PIPE_STATE_CLIP] = rstate;
@@ -1176,28 +1178,28 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx,
 	br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
 	r600_pipe_state_add_reg(rstate,
 				R_028210_PA_SC_CLIPRECT_0_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028214_PA_SC_CLIPRECT_0_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028218_PA_SC_CLIPRECT_1_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_02821C_PA_SC_CLIPRECT_1_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028220_PA_SC_CLIPRECT_2_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028224_PA_SC_CLIPRECT_2_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028228_PA_SC_CLIPRECT_3_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_02822C_PA_SC_CLIPRECT_3_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_SCISSOR]);
 	rctx->states[R600_PIPE_STATE_SCISSOR] = rstate;
@@ -1219,11 +1221,11 @@ static void evergreen_set_stencil_ref(struct pipe_context *ctx,
 	tmp = S_028430_STENCILREF(state->ref_value[0]);
 	r600_pipe_state_add_reg(rstate,
 				R_028430_DB_STENCILREFMASK, tmp,
-				~C_028430_STENCILREF, NULL);
+				~C_028430_STENCILREF, NULL, 0);
 	tmp = S_028434_STENCILREF_BF(state->ref_value[1]);
 	r600_pipe_state_add_reg(rstate,
 				R_028434_DB_STENCILREFMASK_BF, tmp,
-				~C_028434_STENCILREF_BF, NULL);
+				~C_028434_STENCILREF_BF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_STENCIL_REF]);
 	rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate;
@@ -1241,15 +1243,15 @@ static void evergreen_set_viewport_state(struct pipe_context *ctx,
 
 	rctx->viewport = *state;
 	rstate->id = R600_PIPE_STATE_VIEWPORT;
-	r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_VIEWPORT]);
 	rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate;
@@ -1354,28 +1356,28 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
 	/* FIXME handle enabling of CB beyond BASE8 which has different offset */
 	r600_pipe_state_add_reg(rstate,
 				R_028C60_CB_COLOR0_BASE + cb * 0x3C,
-				offset >> 8, 0xFFFFFFFF, bo[0]);
+				offset >> 8, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_028C78_CB_COLOR0_DIM + cb * 0x3C,
-				0x0, 0xFFFFFFFF, NULL);
+				0x0, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028C70_CB_COLOR0_INFO + cb * 0x3C,
-				color_info, 0xFFFFFFFF, bo[0]);
+				color_info, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_028C64_CB_COLOR0_PITCH + cb * 0x3C,
 				S_028C64_PITCH_TILE_MAX(pitch),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028C68_CB_COLOR0_SLICE + cb * 0x3C,
 				S_028C68_SLICE_TILE_MAX(slice),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028C6C_CB_COLOR0_VIEW + cb * 0x3C,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C,
 				S_028C74_NON_DISP_TILING_ORDER(tile_type),
-				0xFFFFFFFF, bo[0]);
+				0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE);
 }
 
 static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate,
@@ -1407,33 +1409,33 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 	stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format);
 
 	r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
-				offset >> 8, 0xFFFFFFFF, rbuffer->bo);
+				offset >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE,
-				offset >> 8, 0xFFFFFFFF, rbuffer->bo);
+				offset >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
 
 	if (stencil_format) {
 		uint32_t stencil_offset;
 
 		stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255;
 		r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
-					(offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
+					(offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
 		r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
-					(offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo);
+					(offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
 	}
 
-	r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
-				S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo);
+				S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
 
 	r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO,
 				S_028040_ARRAY_MODE(rtex->array_mode[level]) | S_028040_FORMAT(format),
-				0xFFFFFFFF, rbuffer->bo);
+				0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE,
 				S_028058_PITCH_TILE_MAX(pitch),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_02805C_DB_DEPTH_SLICE,
 				S_02805C_SLICE_TILE_MAX(slice),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 }
 
 static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
@@ -1492,49 +1494,49 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 
 	r600_pipe_state_add_reg(rstate,
 				R_028240_PA_SC_GENERIC_SCISSOR_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028244_PA_SC_GENERIC_SCISSOR_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028254_PA_SC_VPORT_SCISSOR_0_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028030_PA_SC_SCREEN_SCISSOR_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028034_PA_SC_SCREEN_SCISSOR_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028204_PA_SC_WINDOW_SCISSOR_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028208_PA_SC_WINDOW_SCISSOR_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028200_PA_SC_WINDOW_OFFSET, 0x00000000,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028230_PA_SC_EDGERULE, 0xAAAAAAAA,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 
 	r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK,
-				0x00000000, target_mask, NULL);
+				0x00000000, target_mask, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK,
-				shader_mask, 0xFFFFFFFF, NULL);
+				shader_mask, 0xFFFFFFFF, NULL, 0);
 
 
 	if (rctx->chip_class == CAYMAN) {
 		r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG,
-					0x00000000, 0xFFFFFFFF, NULL);
+					0x00000000, 0xFFFFFFFF, NULL, 0);
 	} else {
 		r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG,
-					0x00000000, 0xFFFFFFFF, NULL);
+					0x00000000, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX,
-					0x00000000, 0xFFFFFFFF, NULL);
+					0x00000000, 0xFFFFFFFF, NULL, 0);
 	}
 
 	free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
@@ -1609,78 +1611,78 @@ static void cayman_init_config(struct r600_pipe_context *rctx)
 
 	tmp = 0x00000000;
 	tmp |= S_008C00_EXPORT_SRC_C(1);
-	r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* always set the temp clauses */
-	r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, 0);
-	r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, 0);
+	r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, NULL, 0);
+	r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xffffffff, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xffffffff, NULL);
+	r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xffffffff, NULL);
+	r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xFFFFFFFF, NULL, 0);
 	r600_context_pipe_state_set(&rctx->ctx, rstate);
 }
 
@@ -1964,39 +1966,39 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
 	tmp |= S_008C00_VS_PRIO(vs_prio);
 	tmp |= S_008C00_GS_PRIO(gs_prio);
 	tmp |= S_008C00_ES_PRIO(es_prio);
-	r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* enable dynamic GPR resource management */
 	if (r600_get_minor_version(rctx->radeon) >= 7) {
 		/* always set temp clauses */
 		r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1,
-					S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL);
+					S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
 					S_028838_PS_GPRS(0x1e) |
 					S_028838_VS_GPRS(0x1e) |
 					S_028838_GS_GPRS(0x1e) |
 					S_028838_ES_GPRS(0x1e) |
 					S_028838_HS_GPRS(0x1e) |
-					S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
+					S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL, 0); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/
 	} else {
 		tmp = 0;
 		tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
 		tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
 		tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
-		r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0);
 
 		tmp = 0;
 		tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
 		tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
-		r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0);
 
 		tmp = 0;
 		tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs);
 		tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs);
-		r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL, 0);
 	}
 
 	tmp = 0;
@@ -2004,109 +2006,109 @@ void evergreen_init_config(struct r600_pipe_context *rctx)
 	tmp |= S_008C18_NUM_VS_THREADS(num_vs_threads);
 	tmp |= S_008C18_NUM_GS_THREADS(num_gs_threads);
 	tmp |= S_008C18_NUM_ES_THREADS(num_es_threads);
-	r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0);
 
 	tmp = 0;
 	tmp |= S_008C1C_NUM_HS_THREADS(num_hs_threads);
 	tmp |= S_008C1C_NUM_LS_THREADS(num_ls_threads);
-	r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0);
 
 	tmp = 0;
 	tmp |= S_008C20_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
 	tmp |= S_008C20_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
-	r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C20_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0);
 
 	tmp = 0;
 	tmp |= S_008C24_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
 	tmp |= S_008C24_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
-	r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C24_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0);
 
 	tmp = 0;
 	tmp |= S_008C28_NUM_HS_STACK_ENTRIES(num_hs_stack_entries);
 	tmp |= S_008C28_NUM_LS_STACK_ENTRIES(num_ls_stack_entries);
-	r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL, 0);
 
 	tmp = 0;
 	tmp |= S_008E2C_NUM_PS_LDS(0x1000);
 	tmp |= S_008E2C_NUM_LS_LDS(0x1000);
-	r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008E2C_SQ_LDS_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_009100_SPI_CONFIG_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_00913C_SPI_CONFIG_CNTL_1, S_00913C_VTX_DONE_DELAY(4), 0xFFFFFFFF, NULL, 0);
 
 #if 0
-	r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x0, 0xFFFFFFFF, NULL, 0);
 #endif
-	r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028900_SQ_ESGS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028904_SQ_GSVS_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028908_SQ_ESTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028910_SQ_VSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028914_SQ_PSTMP_RING_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_02891C_SQ_GS_VERT_ITEMSIZE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028920_SQ_GS_VERT_ITEMSIZE_1, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028924_SQ_GS_VERT_ITEMSIZE_2, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028928_SQ_GS_VERT_ITEMSIZE_3, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL, 0);
 
 	r600_context_pipe_state_set(&rctx->ctx, rstate);
 }
@@ -2143,19 +2145,19 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx)
 		offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
 		r600_pipe_state_add_reg(&state,
 				R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
-				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
-				fui(offset_units), 0xFFFFFFFF, NULL);
+				fui(offset_units), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
-				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
-				fui(offset_units), 0xFFFFFFFF, NULL);
+				fui(offset_units), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
-				offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
+				offset_db_fmt_cntl, 0xFFFFFFFF, NULL, 0);
 		r600_context_pipe_state_set(&rctx->ctx, &state);
 	}
 }
@@ -2252,32 +2254,32 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
 				  S_0286E0_LINEAR_CENTROID_ENA(have_centroid);
 
 	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0,
-				spi_ps_in_control_0, 0xFFFFFFFF, NULL);
+				spi_ps_in_control_0, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1,
-				spi_ps_in_control_1, 0xFFFFFFFF, NULL);
+				spi_ps_in_control_1, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2,
-				0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
+				0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_0286E0_SPI_BARYC_CNTL,
 				spi_baryc_cntl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 
 	r600_pipe_state_add_reg(rstate,
 				R_028840_SQ_PGM_START_PS,
-				0, 0xFFFFFFFF, shader->bo);
+				0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 	r600_pipe_state_add_reg(rstate,
 				R_028844_SQ_PGM_RESOURCES_PS,
 				S_028844_NUM_GPRS(rshader->bc.ngpr) |
 				S_028844_PRIME_CACHE_ON_DRAW(1) |
 				S_028844_STACK_SIZE(rshader->bc.nstack),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028848_SQ_PGM_RESOURCES_2_PS,
-				0x0, 0xFFFFFFFF, NULL);
+				0x0, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_02884C_SQ_PGM_EXPORTS_PS,
-				exports_ps, 0xFFFFFFFF, NULL);
+				exports_ps, 0xFFFFFFFF, NULL, 0);
 	/* FIXME: Evergreen doesn't seem to support MULTIWRITE_ENABLE. */
 	/* only set some bits here, the other bits are set in the dsa state */
 	r600_pipe_state_add_reg(rstate,
@@ -2286,10 +2288,10 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
 				S_02880C_Z_EXPORT_ENABLE(1) |
 				S_02880C_STENCIL_EXPORT_ENABLE(1) |
 				S_02880C_KILL_ENABLE(1),
-				NULL);
+				NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_03A200_SQ_LOOP_CONST_0, 0x01000FFF,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 }
 
 void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
@@ -2314,7 +2316,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
 	for (i = 0; i < 10; i++) {
 		r600_pipe_state_add_reg(rstate,
 					R_02861C_SPI_VS_OUT_ID_0 + i * 4,
-					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
+					spi_vs_out_id[i], 0xFFFFFFFF, NULL, 0);
 	}
 
 	/* Certain attributes (position, psize, etc.) don't count as params.
@@ -2328,22 +2330,22 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
 	r600_pipe_state_add_reg(rstate,
 			R_0286C4_SPI_VS_OUT_CONFIG,
 			S_0286C4_VS_EXPORT_COUNT(nparams - 1),
-			0xFFFFFFFF, NULL);
+			0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 			R_028860_SQ_PGM_RESOURCES_VS,
 			S_028860_NUM_GPRS(rshader->bc.ngpr) |
 			S_028860_STACK_SIZE(rshader->bc.nstack),
-			0xFFFFFFFF, NULL);
+			0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028864_SQ_PGM_RESOURCES_2_VS,
-				0x0, 0xFFFFFFFF, NULL);
+				0x0, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 			R_02885C_SQ_PGM_START_VS,
-			0, 0xFFFFFFFF, shader->bo);
+			0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 
 	r600_pipe_state_add_reg(rstate,
 				R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 }
 
 void evergreen_fetch_shader(struct pipe_context *ctx,
@@ -2354,10 +2356,10 @@ void evergreen_fetch_shader(struct pipe_context *ctx,
 	rstate->id = R600_PIPE_STATE_FETCH_SHADER;
 	rstate->nregs = 0;
 	r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
 				0,
-				0xFFFFFFFF, ve->fetch_shader);
+				0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ);
 }
 
 void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx)
@@ -2371,7 +2373,7 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx)
 	r600_pipe_state_add_reg(rstate,
 				R_02880C_DB_SHADER_CONTROL,
 				0x0,
-				S_02880C_DUAL_EXPORT_ENABLE(1), NULL);
+				S_02880C_DUAL_EXPORT_ENABLE(1), NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028000_DB_RENDER_CONTROL,
 				S_028000_DEPTH_COPY_ENABLE(1) |
@@ -2379,7 +2381,7 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx)
 				S_028000_COPY_CENTROID(1),
 				S_028000_DEPTH_COPY_ENABLE(1) |
 				S_028000_STENCIL_COPY_ENABLE(1) |
-				S_028000_COPY_CENTROID(1), NULL);
+				S_028000_COPY_CENTROID(1), NULL, 0);
 	return rstate;
 }
 
@@ -2405,9 +2407,11 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
 
 void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
 					struct r600_resource *rbuffer,
-					unsigned offset, unsigned stride)
+					unsigned offset, unsigned stride,
+					enum radeon_bo_usage usage)
 {
 	rstate->bo[0] = rbuffer->bo;
+	rstate->bo_usage[0] = usage;
 	rstate->val[0] = offset;
 	rstate->val[1] = rbuffer->bo_size - offset - 1;
 	rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 232912f914d..f24146edcf1 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -26,8 +26,8 @@
 #ifndef R600_H
 #define R600_H
 
+#include "../../winsys/radeon/drm/radeon_winsys.h"
 #include "util/u_double_list.h"
-#include "util/u_inlines.h"
 
 #define R600_ERR(fmt, args...) \
 	fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)
@@ -140,6 +140,7 @@ struct r600_pipe_reg {
 	u32				mask;
 	struct r600_block 		*block;
 	struct r600_bo			*bo;
+	enum radeon_bo_usage		bo_usage;
 	u32				id;
 };
 
@@ -152,7 +153,8 @@ struct r600_pipe_state {
 struct r600_pipe_resource_state {
 	unsigned			id;
 	u32                             val[8];
-	struct r600_bo *bo[2];
+	struct r600_bo			*bo[2];
+	enum radeon_bo_usage		bo_usage[2]; /* XXX set these */
 };
 
 #define R600_BLOCK_STATUS_ENABLED	(1 << 0)
@@ -163,6 +165,7 @@ struct r600_pipe_resource_state {
 
 struct r600_block_reloc {
 	struct r600_bo		*bo;
+	enum radeon_bo_usage	bo_usage;
 	unsigned		flush_flags;
 	unsigned		flush_mask;
 	unsigned		bo_pm4_index;
@@ -311,12 +314,15 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx,
 			      struct r600_pipe_state *state,
 			      u32 offset, u32 value, u32 mask,
 			      u32 range_id, u32 block_id,
-			      struct r600_bo *bo);
+			      struct r600_bo *bo,
+			      enum radeon_bo_usage usage);
 
 void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state,
 				     u32 offset, u32 value, u32 mask,
-				     struct r600_bo *bo);
-#define r600_pipe_state_add_reg(state, offset, value, mask, bo) _r600_pipe_state_add_reg(&rctx->ctx, state, offset, value, mask, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo)
+				     struct r600_bo *bo,
+				     enum radeon_bo_usage usage);
+
+#define r600_pipe_state_add_reg(state, offset, value, mask, bo, usage) _r600_pipe_state_add_reg(&rctx->ctx, state, offset, value, mask, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo, usage)
 
 static inline void r600_pipe_state_mod_reg(struct r600_pipe_state *state,
 					   u32 value)
@@ -326,10 +332,12 @@ static inline void r600_pipe_state_mod_reg(struct r600_pipe_state *state,
 }
 
 static inline void r600_pipe_state_mod_reg_bo(struct r600_pipe_state *state,
-					   u32 value, struct r600_bo *bo)
+					      u32 value, struct r600_bo *bo,
+					      enum radeon_bo_usage usage)
 {
 	state->regs[state->nregs].value = value;
 	state->regs[state->nregs].bo = bo;
+	state->regs[state->nregs].bo_usage = usage;
 	state->nregs++;
 }
 
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 7ab785ee2df..2747f54079c 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -250,7 +250,8 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
 					 struct r600_pipe_resource_state *rstate);
 void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
 					struct r600_resource *rbuffer,
-					unsigned offset, unsigned stride);
+					unsigned offset, unsigned stride,
+					enum radeon_bo_usage usage);
 boolean evergreen_is_format_supported(struct pipe_screen *screen,
 				      enum pipe_format format,
 				      enum pipe_texture_target target,
@@ -302,7 +303,8 @@ void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
 				    struct r600_pipe_resource_state *rstate);
 void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
 				   struct r600_resource *rbuffer,
-				   unsigned offset, unsigned stride);
+				   unsigned offset, unsigned stride,
+				   enum radeon_bo_usage usage);
 void r600_adjust_gprs(struct r600_pipe_context *rctx);
 boolean r600_is_format_supported(struct pipe_screen *screen,
 				 enum pipe_format format,
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 487b1df0052..0757eab2ea7 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -662,19 +662,19 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx)
 		offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth);
 		r600_pipe_state_add_reg(&state,
 				R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE,
-				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET,
-				fui(offset_units), 0xFFFFFFFF, NULL);
+				fui(offset_units), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE,
-				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL);
+				fui(rctx->rasterizer->offset_scale), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET,
-				fui(offset_units), 0xFFFFFFFF, NULL);
+				fui(offset_units), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&state,
 				R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
-				offset_db_fmt_cntl, 0xFFFFFFFF, NULL);
+				offset_db_fmt_cntl, 0xFFFFFFFF, NULL, 0);
 		r600_context_pipe_state_set(&rctx->ctx, &state);
 	}
 }
@@ -689,10 +689,10 @@ static void r600_set_blend_color(struct pipe_context *ctx,
 		return;
 
 	rstate->id = R600_PIPE_STATE_BLEND_COLOR;
-	r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028414_CB_BLEND_RED, fui(state->color[0]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028418_CB_BLEND_GREEN, fui(state->color[1]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02841C_CB_BLEND_BLUE, fui(state->color[2]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028420_CB_BLEND_ALPHA, fui(state->color[3]), 0xFFFFFFFF, NULL, 0);
 	free(rctx->states[R600_PIPE_STATE_BLEND_COLOR]);
 	rctx->states[R600_PIPE_STATE_BLEND_COLOR] = rstate;
 	r600_context_pipe_state_set(&rctx->ctx, rstate);
@@ -742,7 +742,7 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
 	blend->cb_target_mask = target_mask;
 	/* MULTIWRITE_ENABLE is controlled by r600_pipe_shader_ps(). */
 	r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
-				color_control, 0xFFFFFFFD, NULL);
+				color_control, 0xFFFFFFFD, NULL, 0);
 
 	for (int i = 0; i < 8; i++) {
 		/* state->rt entries > 0 only written if independent blending */
@@ -773,9 +773,9 @@ static void *r600_create_blend_state(struct pipe_context *ctx,
 
 		/* R600 does not support per-MRT blends */
 		if (rctx->family > CHIP_R600)
-			r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL);
+			r600_pipe_state_add_reg(rstate, R_028780_CB_BLEND0_CONTROL + i * 4, bc, 0xFFFFFFFF, NULL, 0);
 		if (i == 0)
-			r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL);
+			r600_pipe_state_add_reg(rstate, R_028804_CB_BLEND_CONTROL, bc, 0xFFFFFFFF, NULL, 0);
 	}
 	return rstate;
 }
@@ -842,28 +842,28 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
 		S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
 		S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
 	/* TODO db_render_override depends on query */
-	r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028028_DB_STENCIL_CLEAR, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02802C_DB_DEPTH_CLEAR, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028430_DB_STENCILREFMASK, stencil_ref_mask,
-				0xFFFFFFFF & C_028430_STENCILREF, NULL);
+				0xFFFFFFFF & C_028430_STENCILREF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028434_DB_STENCILREFMASK_BF, stencil_ref_mask_bf,
-				0xFFFFFFFF & C_028434_STENCILREF_BF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL);
+				0xFFFFFFFF & C_028434_STENCILREF_BF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286E0_SPI_FOG_FUNC_SCALE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286E4_SPI_FOG_FUNC_BIAS, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286DC_SPI_FOG_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control, 0xFFFFFFFF, NULL, 0);
 	/* The DB_SHADER_CONTROL mask is 0xFFFFFFBC since Z_EXPORT_ENABLE,
 	 * STENCIL_EXPORT_ENABLE and KILL_ENABLE are controlled by
 	 * r600_pipe_shader_ps().*/
-	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL);
-	r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, db_shader_control, 0xFFFFFFBC, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028D0C_DB_RENDER_CONTROL, db_render_control, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028D10_DB_RENDER_OVERRIDE, db_render_override, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028D2C_DB_SRESULTS_COMPARE_STATE1, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028D30_DB_PRELOAD_CONTROL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028D44_DB_ALPHA_TO_MASK, 0x0000AA00, 0xFFFFFFFF, NULL, 0);
 
 	return rstate;
 }
@@ -907,7 +907,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
 			tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
 		}
 	}
-	r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_0286D4_SPI_INTERP_CONTROL_0, tmp, 0xFFFFFFFF, NULL, 0);
 
 	polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
 				state->fill_back != PIPE_POLYGON_MODE_FILL);
@@ -921,33 +921,33 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
 		S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
 		S_028814_POLY_MODE(polygon_dual_mode) |
 		S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
-		S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL);
+		S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)), 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_02881C_PA_CL_VS_OUT_CNTL,
 			S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
-			S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
+			S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028820_PA_CL_NANINF_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
 	/* point size 12.4 fixed point */
 	tmp = (unsigned)(state->point_size * 8.0);
-	r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL, 0);
 
 	tmp = (unsigned)state->line_width * 8;
-	r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL, 0);
 
 	r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL,
 				S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL, 0);
 
 	return rstate;
 }
@@ -977,17 +977,17 @@ static void *r600_create_sampler_state(struct pipe_context *ctx,
 					S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) |
 					S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) |
 					S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) |
-					S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL);
+					S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0,
 					S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) |
 					S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) |
-					S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL);
+					S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL, 0);
 	if (uc.ui) {
-		r600_pipe_state_add_reg_noblock(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL, 0);
 	}
 	return rstate;
 }
@@ -1068,6 +1068,8 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
 
 	rstate->bo[0] = bo[0];
 	rstate->bo[1] = bo[1];
+	rstate->bo_usage[0] = RADEON_USAGE_READ;
+	rstate->bo_usage[1] = RADEON_USAGE_READ;
 
 	rstate->val[0] = (S_038000_DIM(r600_tex_dim(texture->target)) |
 			  S_038000_TILE_MODE(array_mode) |
@@ -1157,7 +1159,7 @@ static void r600_set_seamless_cubemap(struct r600_pipe_context *rctx, boolean en
 	rstate->id = R600_PIPE_STATE_SEAMLESS_CUBEMAP;
 	r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX,
 				(enable ? 0 : S_009508_DISABLE_CUBE_WRAP(1)),
-				1, NULL);
+				1, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP]);
 	rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP] = rstate;
@@ -1215,21 +1217,21 @@ static void r600_set_clip_state(struct pipe_context *ctx,
 	for (int i = 0; i < state->nr; i++) {
 		r600_pipe_state_add_reg(rstate,
 					R_028E20_PA_CL_UCP0_X + i * 16,
-					fui(state->ucp[i][0]), 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][0]), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate,
 					R_028E24_PA_CL_UCP0_Y + i * 16,
-					fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][1]) , 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate,
 					R_028E28_PA_CL_UCP0_Z + i * 16,
-					fui(state->ucp[i][2]), 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][2]), 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate,
 					R_028E2C_PA_CL_UCP0_W + i * 16,
-					fui(state->ucp[i][3]), 0xFFFFFFFF, NULL);
+					fui(state->ucp[i][3]), 0xFFFFFFFF, NULL, 0);
 	}
 	r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL,
 			S_028810_PS_UCP_MODE(3) | ((1 << state->nr) - 1) |
 			S_028810_ZCLIP_NEAR_DISABLE(state->depth_clamp) |
-			S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL);
+			S_028810_ZCLIP_FAR_DISABLE(state->depth_clamp), 0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_CLIP]);
 	rctx->states[R600_PIPE_STATE_CLIP] = rstate;
@@ -1260,28 +1262,28 @@ static void r600_set_scissor_state(struct pipe_context *ctx,
 	br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy);
 	r600_pipe_state_add_reg(rstate,
 				R_028210_PA_SC_CLIPRECT_0_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028214_PA_SC_CLIPRECT_0_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028218_PA_SC_CLIPRECT_1_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_02821C_PA_SC_CLIPRECT_1_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028220_PA_SC_CLIPRECT_2_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028224_PA_SC_CLIPRECT_2_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028228_PA_SC_CLIPRECT_3_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_02822C_PA_SC_CLIPRECT_3_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_SCISSOR]);
 	rctx->states[R600_PIPE_STATE_SCISSOR] = rstate;
@@ -1303,11 +1305,11 @@ static void r600_set_stencil_ref(struct pipe_context *ctx,
 	tmp = S_028430_STENCILREF(state->ref_value[0]);
 	r600_pipe_state_add_reg(rstate,
 				R_028430_DB_STENCILREFMASK, tmp,
-				~C_028430_STENCILREF, NULL);
+				~C_028430_STENCILREF, NULL, 0);
 	tmp = S_028434_STENCILREF_BF(state->ref_value[1]);
 	r600_pipe_state_add_reg(rstate,
 				R_028434_DB_STENCILREFMASK_BF, tmp,
-				~C_028434_STENCILREF_BF, NULL);
+				~C_028434_STENCILREF_BF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_STENCIL_REF]);
 	rctx->states[R600_PIPE_STATE_STENCIL_REF] = rstate;
@@ -1325,15 +1327,15 @@ static void r600_set_viewport_state(struct pipe_context *ctx,
 
 	rctx->viewport = *state;
 	rstate->id = R600_PIPE_STATE_VIEWPORT;
-	r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_0282D0_PA_SC_VPORT_ZMIN_0, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0282D4_PA_SC_VPORT_ZMAX_0, 0x3F800000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]), 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028818_PA_CL_VTE_CNTL, 0x0000043F, 0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_VIEWPORT]);
 	rctx->states[R600_PIPE_STATE_VIEWPORT] = rstate;
@@ -1441,27 +1443,27 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 
 	r600_pipe_state_add_reg(rstate,
 				R_028040_CB_COLOR0_BASE + cb * 4,
-				offset >> 8, 0xFFFFFFFF, bo[0]);
+				offset >> 8, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_0280A0_CB_COLOR0_INFO + cb * 4,
-				color_info, 0xFFFFFFFF, bo[0]);
+				color_info, 0xFFFFFFFF, bo[0], RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_028060_CB_COLOR0_SIZE + cb * 4,
 				S_028060_PITCH_TILE_MAX(pitch) |
 				S_028060_SLICE_TILE_MAX(slice),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028080_CB_COLOR0_VIEW + cb * 4,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_0280E0_CB_COLOR0_FRAG + cb * 4,
-				0, 0xFFFFFFFF, bo[1]);
+				0, 0xFFFFFFFF, bo[1], RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_0280C0_CB_COLOR0_TILE + cb * 4,
-				0, 0xFFFFFFFF, bo[2]);
+				0, 0xFFFFFFFF, bo[2], RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate,
 				R_028100_CB_COLOR0_MASK + cb * 4,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 }
 
 static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate,
@@ -1492,16 +1494,16 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 	format = r600_translate_dbformat(state->zsbuf->texture->format);
 
 	r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE,
-				offset >> 8, 0xFFFFFFFF, rbuffer->bo);
+				offset >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028000_DB_DEPTH_SIZE,
 				S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice),
-				0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO,
 				S_028010_ARRAY_MODE(rtex->array_mode[level]) | S_028010_FORMAT(format),
-				0xFFFFFFFF, rbuffer->bo);
+				0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT,
-				(surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL);
+				(surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL, 0);
 }
 
 static void r600_set_framebuffer_state(struct pipe_context *ctx,
@@ -1546,59 +1548,59 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
 
 	r600_pipe_state_add_reg(rstate,
 				R_028030_PA_SC_SCREEN_SCISSOR_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028034_PA_SC_SCREEN_SCISSOR_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028204_PA_SC_WINDOW_SCISSOR_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028208_PA_SC_WINDOW_SCISSOR_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028240_PA_SC_GENERIC_SCISSOR_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028244_PA_SC_GENERIC_SCISSOR_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028250_PA_SC_VPORT_SCISSOR_0_TL, tl,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028254_PA_SC_VPORT_SCISSOR_0_BR, br,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028200_PA_SC_WINDOW_OFFSET, 0x00000000,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	if (rctx->chip_class >= R700) {
 		r600_pipe_state_add_reg(rstate,
 					R_028230_PA_SC_EDGERULE, 0xAAAAAAAA,
-					0xFFFFFFFF, NULL);
+					0xFFFFFFFF, NULL, 0);
 	}
 
 	r600_pipe_state_add_reg(rstate, R_0287A0_CB_SHADER_CONTROL,
-				shader_control, 0xFFFFFFFF, NULL);
+				shader_control, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK,
-				0x00000000, target_mask, NULL);
+				0x00000000, target_mask, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK,
-				shader_mask, 0xFFFFFFFF, NULL);
+				shader_mask, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C30_CB_CLRCMP_CONTROL,
-				0x01000000, 0xFFFFFFFF, NULL);
+				0x01000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C34_CB_CLRCMP_SRC,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C38_CB_CLRCMP_DST,
-				0x000000FF, 0xFFFFFFFF, NULL);
+				0x000000FF, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C3C_CB_CLRCMP_MSK,
-				0xFFFFFFFF, 0xFFFFFFFF, NULL);
+				0xFFFFFFFF, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028C48_PA_SC_AA_MASK,
-				0xFFFFFFFF, 0xFFFFFFFF, NULL);
+				0xFFFFFFFF, 0xFFFFFFFF, NULL, 0);
 
 	free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]);
 	rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate;
@@ -1695,7 +1697,7 @@ void r600_adjust_gprs(struct r600_pipe_context *rctx)
 	tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
 	tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
 	rstate.nregs = 0;
-	r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL);
+	r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL, 0);
 
 	r600_context_pipe_state_set(&rctx->ctx, &rstate);
 }
@@ -1866,20 +1868,20 @@ void r600_init_config(struct r600_pipe_context *rctx)
 	tmp |= S_008C00_VS_PRIO(vs_prio);
 	tmp |= S_008C00_GS_PRIO(gs_prio);
 	tmp |= S_008C00_ES_PRIO(es_prio);
-	r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* SQ_GPR_RESOURCE_MGMT_1 */
 	tmp = 0;
 	tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
 	tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
 	tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs);
-	r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* SQ_GPR_RESOURCE_MGMT_2 */
 	tmp = 0;
 	tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs);
 	tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs);
-	r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* SQ_THREAD_RESOURCE_MGMT */
 	tmp = 0;
@@ -1887,78 +1889,78 @@ void r600_init_config(struct r600_pipe_context *rctx)
 	tmp |= S_008C0C_NUM_VS_THREADS(num_vs_threads);
 	tmp |= S_008C0C_NUM_GS_THREADS(num_gs_threads);
 	tmp |= S_008C0C_NUM_ES_THREADS(num_es_threads);
-	r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C0C_SQ_THREAD_RESOURCE_MGMT, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* SQ_STACK_RESOURCE_MGMT_1 */
 	tmp = 0;
 	tmp |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries);
 	tmp |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries);
-	r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C10_SQ_STACK_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL, 0);
 
 	/* SQ_STACK_RESOURCE_MGMT_2 */
 	tmp = 0;
 	tmp |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries);
 	tmp |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries);
-	r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_008C14_SQ_STACK_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_009714_VC_ENHANCE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028350_SX_MISC, 0x00000000, 0xFFFFFFFF, NULL, 0);
 
 	if (rctx->chip_class >= R700) {
-		r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX,
 					S_009508_DISABLE_CUBE_ANISO(1) |
 					S_009508_SYNC_GRADIENT(1) |
 					S_009508_SYNC_WALKER(1) |
-					S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL);
+					S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL, 0);
 	} else {
-		r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX,
 					S_009508_DISABLE_CUBE_ANISO(1) |
 					S_009508_SYNC_GRADIENT(1) |
 					S_009508_SYNC_WALKER(1) |
-					S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL);
+					S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL, 0);
 	}
-	r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288B0_SQ_ESTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288B4_SQ_GSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288B8_SQ_VSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288BC_SQ_PSTMP_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288C0_SQ_FBUF_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288C4_SQ_REDUC_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0288C8_SQ_GS_VERT_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB0_VGT_STRMOUT_EN, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000001, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028B20_VGT_STRMOUT_BUFFER_EN, 0x00000000, 0xFFFFFFFF, NULL, 0);
 
-	r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A84_VGT_PRIMITIVEID_EN, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 0x00000000, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_028AA4_VGT_INSTANCE_STEP_RATE_1, 0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_context_pipe_state_set(&rctx->ctx, rstate);
 }
 
@@ -2022,38 +2024,38 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shad
 			S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
 	}
 
-	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL, 0);
+	r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028840_SQ_PGM_START_PS,
-				0, 0xFFFFFFFF, shader->bo);
+				0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 	r600_pipe_state_add_reg(rstate,
 				R_028850_SQ_PGM_RESOURCES_PS,
 				S_028868_NUM_GPRS(rshader->bc.ngpr) |
 				S_028868_STACK_SIZE(rshader->bc.nstack),
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028854_SQ_PGM_EXPORTS_PS,
-				exports_ps, 0xFFFFFFFF, NULL);
+				exports_ps, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_0288CC_SQ_PGM_CF_OFFSET_PS,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
 				S_028808_MULTIWRITE_ENABLE(!!rshader->fs_write_all),
 				S_028808_MULTIWRITE_ENABLE(1),
-				NULL);
+				NULL, 0);
 	/* only set some bits here, the other bits are set in the dsa state */
 	r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL,
 				db_shader_control,
 				S_02880C_Z_EXPORT_ENABLE(1) |
 				S_02880C_STENCIL_REF_EXPORT_ENABLE(1) |
 				S_02880C_KILL_ENABLE(1),
-				NULL);
+				NULL, 0);
 
 	r600_pipe_state_add_reg(rstate,
 				R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 }
 
 void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
@@ -2081,7 +2083,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad
 	for (i = 0; i < 10; i++) {
 		r600_pipe_state_add_reg(rstate,
 					R_028614_SPI_VS_OUT_ID_0 + i * 4,
-					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
+					spi_vs_out_id[i], 0xFFFFFFFF, NULL, 0);
 	}
 
 	/* Certain attributes (position, psize, etc.) don't count as params.
@@ -2095,22 +2097,22 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad
 	r600_pipe_state_add_reg(rstate,
 			R_0286C4_SPI_VS_OUT_CONFIG,
 			S_0286C4_VS_EXPORT_COUNT(nparams - 1),
-			0xFFFFFFFF, NULL);
+			0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 			R_028868_SQ_PGM_RESOURCES_VS,
 			S_028868_NUM_GPRS(rshader->bc.ngpr) |
 			S_028868_STACK_SIZE(rshader->bc.nstack),
-			0xFFFFFFFF, NULL);
+			0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 			R_0288D0_SQ_PGM_CF_OFFSET_VS,
-			0x00000000, 0xFFFFFFFF, NULL);
+			0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 			R_028858_SQ_PGM_START_VS,
-			0, 0xFFFFFFFF, shader->bo);
+			0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 
 	r600_pipe_state_add_reg(rstate,
 				R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
-				0xFFFFFFFF, NULL);
+				0xFFFFFFFF, NULL, 0);
 }
 
 void r600_fetch_shader(struct pipe_context *ctx,
@@ -2123,12 +2125,12 @@ void r600_fetch_shader(struct pipe_context *ctx,
 	rstate->id = R600_PIPE_STATE_FETCH_SHADER;
 	rstate->nregs = 0;
 	r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
-				0x00000000, 0xFFFFFFFF, NULL);
+				0x00000000, 0xFFFFFFFF, NULL, 0);
 	r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
 				0,
-				0xFFFFFFFF, ve->fetch_shader);
+				0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ);
 }
 
 void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
@@ -2157,7 +2159,7 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
 	r600_pipe_state_add_reg(rstate,
 				R_02880C_DB_SHADER_CONTROL,
 				0x0,
-				S_02880C_DUAL_EXPORT_ENABLE(1), NULL);
+				S_02880C_DUAL_EXPORT_ENABLE(1), NULL, 0);
 	r600_pipe_state_add_reg(rstate,
 				R_028D0C_DB_RENDER_CONTROL,
 				S_028D0C_DEPTH_COPY_ENABLE(1) |
@@ -2165,7 +2167,7 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx)
 				S_028D0C_COPY_CENTROID(1),
 				S_028D0C_DEPTH_COPY_ENABLE(1) |
 				S_028D0C_STENCIL_COPY_ENABLE(1) |
-				S_028D0C_COPY_CENTROID(1), NULL);
+				S_028D0C_COPY_CENTROID(1), NULL, 0);
 	return rstate;
 }
 
@@ -2186,10 +2188,12 @@ void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
 
 void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
 				   struct r600_resource *rbuffer,
-				   unsigned offset, unsigned stride)
+				   unsigned offset, unsigned stride,
+				   enum radeon_bo_usage usage)
 {
 	rstate->val[0] = offset;
 	rstate->bo[0] = rbuffer->bo;
+	rstate->bo_usage[0] = usage;
 	rstate->val[1] = rbuffer->bo_size - offset - 1;
 	rstate->val[2] = S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
 	                 S_038008_STRIDE(stride);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 2831517fe86..53a1313a2a8 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -336,7 +336,7 @@ static void r600_update_alpha_ref(struct r600_pipe_context *rctx)
 	rstate.nregs = 0;
 	if (rctx->export_16bpc)
 		alpha_ref &= ~0x1FFF;
-	r600_pipe_state_add_reg(&rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(&rstate, R_028438_SX_ALPHA_REF, alpha_ref, 0xFFFFFFFF, NULL, 0);
 
 	r600_context_pipe_state_set(&rctx->ctx, &rstate);
 	rctx->alpha_ref_dirty = false;
@@ -349,7 +349,7 @@ static void r600_spi_block_init(struct r600_pipe_context *rctx, struct r600_pipe
 	rstate->nregs = 0;
 	rstate->id = R600_PIPE_STATE_SPI;
 	for (i = 0; i < 32; i++) {
-		r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, 0, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, 0, 0xFFFFFFFF, NULL, 0);
 	}
 }
 
@@ -425,10 +425,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 		r600_pipe_state_add_reg(&rctx->vs_const_buffer,
 					R_028180_ALU_CONST_BUFFER_SIZE_VS_0,
 					ALIGN_DIVUP(buffer->width0 >> 4, 16),
-					0xFFFFFFFF, NULL);
+					0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->vs_const_buffer,
 					R_028980_ALU_CONST_CACHE_VS_0,
-					offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+					offset >> 8, 0xFFFFFFFF, rbuffer->r.bo, RADEON_USAGE_READ);
 		r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
 
 		rstate = &rctx->vs_const_buffer_resource[index];
@@ -441,10 +441,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 		}
 
 		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16);
+			evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ);
 			evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
 		} else {
-			r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16);
+			r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ);
 			r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
 		}
 		break;
@@ -453,10 +453,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 		r600_pipe_state_add_reg(&rctx->ps_const_buffer,
 					R_028140_ALU_CONST_BUFFER_SIZE_PS_0,
 					ALIGN_DIVUP(buffer->width0 >> 4, 16),
-					0xFFFFFFFF, NULL);
+					0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->ps_const_buffer,
 					R_028940_ALU_CONST_CACHE_PS_0,
-					offset >> 8, 0xFFFFFFFF, rbuffer->r.bo);
+					offset >> 8, 0xFFFFFFFF, rbuffer->r.bo, RADEON_USAGE_READ);
 		r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
 
 		rstate = &rctx->ps_const_buffer_resource[index];
@@ -468,10 +468,10 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 			}
 		}
 		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16);
+			evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ);
 			evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
 		} else {
-			r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16);
+			r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16, RADEON_USAGE_READ);
 			r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
 		}
 		break;
@@ -528,10 +528,10 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
 		}
 
 		if (rctx->chip_class >= EVERGREEN) {
-			evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride);
+			evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
 			evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
 		} else {
-			r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride);
+			r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
 			r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
 		}
 	}
@@ -614,16 +614,16 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	if (rctx->vgt.id != R600_PIPE_STATE_VGT) {
 		rctx->vgt.id = R600_PIPE_STATE_VGT;
 		rctx->vgt.nregs = 0;
-		r600_pipe_state_add_reg(&rctx->vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(&rctx->vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
-		r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL);
+		r600_pipe_state_add_reg(&rctx->vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->vgt, R_028814_PA_SU_SC_MODE_CNTL,
 					0,
-					S_028814_PROVOKING_VTX_LAST(1), NULL);
+					S_028814_PROVOKING_VTX_LAST(1), NULL, 0);
 
 	}
 
@@ -675,11 +675,14 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx,
 			      struct r600_pipe_state *state,
 			      u32 offset, u32 value, u32 mask,
 			      u32 range_id, u32 block_id,
-			      struct r600_bo *bo)
+			      struct r600_bo *bo,
+			      enum radeon_bo_usage usage)
 {
 	struct r600_range *range;
 	struct r600_block *block;
 
+	if (bo) assert(usage);
+
 	range = &ctx->range[range_id];
 	block = range->blocks[block_id];
 	state->regs[state->nregs].block = block;
@@ -688,6 +691,7 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx,
 	state->regs[state->nregs].value = value;
 	state->regs[state->nregs].mask = mask;
 	state->regs[state->nregs].bo = bo;
+	state->regs[state->nregs].bo_usage = usage;
 
 	state->nregs++;
 	assert(state->nregs < R600_BLOCK_MAX_REG);
@@ -695,13 +699,17 @@ void _r600_pipe_state_add_reg(struct r600_context *ctx,
 
 void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state,
 				     u32 offset, u32 value, u32 mask,
-				     struct r600_bo *bo)
+				     struct r600_bo *bo,
+				     enum radeon_bo_usage usage)
 {
+	if (bo) assert(usage);
+
 	state->regs[state->nregs].id = offset;
 	state->regs[state->nregs].block = NULL;
 	state->regs[state->nregs].value = value;
 	state->regs[state->nregs].mask = mask;
 	state->regs[state->nregs].bo = bo;
+	state->regs[state->nregs].bo_usage = usage;
 
 	state->nregs++;
 	assert(state->nregs < R600_BLOCK_MAX_REG);
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index eaf461833c7..30bb0b8223c 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -1186,7 +1186,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
 		pm4[7] = draw->vgt_num_indices;
 		pm4[8] = draw->vgt_draw_initiator;
 		pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		pm4[10] = r600_context_bo_reloc(ctx, draw->indices);
+		pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ);
 	} else {
 		pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing);
 		pm4[5] = draw->vgt_num_indices;
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index ba8d6c2aa64..6c5b4b8953a 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -84,7 +84,7 @@ void r600_get_backend_mask(struct r600_context *ctx)
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer);
+		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE);
 
 		/* execute */
 		r600_context_flush(ctx, 0);
@@ -986,7 +986,7 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
 		ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
 		ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
 		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo);
+		ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE);
 	}
 	bo->last_flush = (bo->last_flush | flush_flags) & flush_mask;
 }
@@ -1067,6 +1067,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
 			/* find relocation */
 			reloc_id = block->pm4_bo_index[id];
 			r600_bo_reference(&block->reloc[reloc_id].bo, reg->bo);
+			block->reloc[reloc_id].bo_usage = reg->bo_usage;
 			/* always force dirty for relocs for now */
 			dirty |= R600_BLOCK_STATUS_DIRTY;
 		}
@@ -1140,11 +1141,14 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_
 			 * we have single case btw VERTEX & TEXTURE resource
 			 */
 			r600_bo_reference(&block->reloc[1].bo, state->bo[0]);
+			block->reloc[1].bo_usage = state->bo_usage[0];
 			r600_bo_reference(&block->reloc[2].bo, NULL);
 		} else {
 			/* TEXTURE RESOURCE */
 			r600_bo_reference(&block->reloc[1].bo, state->bo[0]);
+			block->reloc[1].bo_usage = state->bo_usage[0];
 			r600_bo_reference(&block->reloc[2].bo, state->bo[1]);
+			block->reloc[2].bo_usage = state->bo_usage[1];
 			state->bo[0]->binding |= BO_BOUND_TEXTURE;
 		}
 
@@ -1279,7 +1283,6 @@ struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset)
 
 void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block)
 {
-	int id;
 	int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS);
 	int cp_dwords = block->pm4_ndwords, start_dword = 0;
 	int new_dwords = 0;
@@ -1295,13 +1298,13 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *
 		for (int j = 0; j < block->nreg; j++) {
 			if (block->pm4_bo_index[j]) {
 				/* find relocation */
-				id = block->pm4_bo_index[j];
-				block->pm4[block->reloc[id].bo_pm4_index] =
-					r600_context_bo_reloc(ctx, block->reloc[id].bo);
+				struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]];
+				block->pm4[reloc->bo_pm4_index] =
+					r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage);
 				r600_context_bo_flush(ctx,
-						      block->reloc[id].flush_flags,
-						      block->reloc[id].flush_mask,
-						      block->reloc[id].bo);
+						      reloc->flush_flags,
+						      reloc->flush_mask,
+						      reloc->bo);
 				nbo--;
 				if (nbo == 0)
 					break;
@@ -1335,7 +1338,6 @@ out:
 
 void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block)
 {
-	int id;
 	int cp_dwords = block->pm4_ndwords;
 	int nbo = block->nbo;
 
@@ -1349,13 +1351,13 @@ void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r60
 	for (int j = 0; j < nbo; j++) {
 		if (block->pm4_bo_index[j]) {
 			/* find relocation */
-			id = block->pm4_bo_index[j];
-			block->pm4[block->reloc[id].bo_pm4_index] =
-				r600_context_bo_reloc(ctx, block->reloc[id].bo);
+			struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]];
+			block->pm4[reloc->bo_pm4_index] =
+				r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage);
 			r600_context_bo_flush(ctx,
-					      block->reloc[id].flush_flags,
-					      block->reloc[id].flush_mask,
-					      block->reloc[id].bo);
+					      reloc->flush_flags,
+					      reloc->flush_mask,
+					      reloc->bo);
 		}
 	}
 	ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
@@ -1466,7 +1468,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
 		pm4[7] = draw->vgt_num_indices;
 		pm4[8] = draw->vgt_draw_initiator;
 		pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
-		pm4[10] = r600_context_bo_reloc(ctx, draw->indices);
+		pm4[10] = r600_context_bo_reloc(ctx, draw->indices, RADEON_USAGE_READ);
 	} else {
 		pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing);
 		pm4[5] = draw->vgt_num_indices;
@@ -1561,7 +1563,7 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence_bo,
 	ctx->pm4[ctx->pm4_cdwords++] = value;                   /* DATA_LO */
 	ctx->pm4[ctx->pm4_cdwords++] = 0;                       /* DATA_HI */
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo);
+	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE);
 }
 
 static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait)
@@ -1672,7 +1674,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	}
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer);
+	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
 
 	query->state |= R600_QUERY_STATE_STARTED;
 	query->state ^= R600_QUERY_STATE_ENDED;
@@ -1696,7 +1698,7 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
 		ctx->pm4[ctx->pm4_cdwords++] = 0;
 	}
 	ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer);
+	ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
 
 	query->results_end += query->result_size;
 	if (query->results_end >= query->buffer_size)
@@ -1741,7 +1743,8 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 			ctx->pm4[ctx->pm4_cdwords++] = results_base;
 			ctx->pm4[ctx->pm4_cdwords++] = op;
 			ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
-			ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer);
+			ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer,
+									     RADEON_USAGE_READ);
 			results_base += query->result_size;
 			if (results_base >= query->buffer_size)
 				results_base = 0;
diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h
index c5b82fd43ae..1e901897efd 100644
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -95,11 +95,17 @@ void r600_context_reg(struct r600_context *ctx,
 void r600_init_cs(struct r600_context *ctx);
 int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base);
 
-static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo)
+static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_bo *rbo,
+					     enum radeon_bo_usage usage)
 {
+	enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? rbo->domains : 0;
+	enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? rbo->domains : 0;
+
+	assert(usage);
+
 	unsigned reloc_index =
 		ctx->radeon->ws->cs_add_reloc(ctx->cs, rbo->cs_buf,
-					      rbo->domains, rbo->domains);
+					      rd, wd);
 
 	if (reloc_index >= ctx->creloc)
 		ctx->creloc = reloc_index+1;

From a77431b3b0cda9da7dff628f65aaa804c7c96f57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Sun, 14 Aug 2011 21:21:38 +0200
Subject: [PATCH 341/600] r600g: consolidate two files r600d.h

---
 src/gallium/drivers/r600/r600d.h    |   56 +-
 src/gallium/winsys/r600/drm/r600d.h | 2241 ---------------------------
 2 files changed, 55 insertions(+), 2242 deletions(-)
 delete mode 100644 src/gallium/winsys/r600/drm/r600d.h

diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index f6eec24cc05..de458cf398a 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -28,6 +28,32 @@
 
 #define R600_TEXEL_PITCH_ALIGNMENT_MASK        0x7
 
+/* evergreen values */
+#define EG_RESOURCE_OFFSET                 0x00030000
+#define EG_RESOURCE_END                    0x00034000
+#define EG_LOOP_CONST_OFFSET               0x0003A200
+#define EG_LOOP_CONST_END                  0x0003A26C
+#define EG_BOOL_CONST_OFFSET               0x0003A500
+#define EG_BOOL_CONST_END                  0x0003A506
+
+#define R600_CONFIG_REG_OFFSET                 0X00008000
+#define R600_CONFIG_REG_END                    0X0000AC00
+#define R600_CONTEXT_REG_OFFSET                0X00028000
+#define R600_CONTEXT_REG_END                   0X00029000
+#define R600_ALU_CONST_OFFSET                  0X00030000
+#define R600_ALU_CONST_END                     0X00032000
+#define R600_RESOURCE_OFFSET                   0X00038000
+#define R600_RESOURCE_END                      0X0003C000
+#define R600_SAMPLER_OFFSET                    0X0003C000
+#define R600_SAMPLER_END                       0X0003CFF0
+#define R600_CTL_CONST_OFFSET                  0X0003CFF0
+#define R600_CTL_CONST_END                     0X0003E200
+#define R600_LOOP_CONST_OFFSET                 0X0003E200
+#define R600_LOOP_CONST_END                    0X0003E380
+#define R600_BOOL_CONST_OFFSET                 0X0003E380
+#define R600_BOOL_CONST_END                    0X00040000
+
+
 #define PKT3_NOP                               0x10
 #define PKT3_INDIRECT_BUFFER_END               0x17
 #define PKT3_SET_PREDICATION                   0x20
@@ -66,11 +92,38 @@
 #define PKT3_SET_SAMPLER                       0x6E
 #define PKT3_SET_CTL_CONST                     0x6F
 #define PKT3_SURFACE_BASE_UPDATE               0x73
+#define		SURFACE_BASE_UPDATE_DEPTH      (1 << 0)
+#define		SURFACE_BASE_UPDATE_COLOR(x)   (2 << (x))
+#define		SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
+
+#define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
+#define EVENT_TYPE_ZPASS_DONE                  0x15
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT   0x16
+#define		EVENT_TYPE(x)                           ((x) << 0)
+#define		EVENT_INDEX(x)                          ((x) << 8)
+                /* 0 - any non-TS event
+		 * 1 - ZPASS_DONE
+		 * 2 - SAMPLE_PIPELINESTAT
+		 * 3 - SAMPLE_STREAMOUTSTAT*
+		 * 4 - *S_PARTIAL_FLUSH
+		 * 5 - TS events
+		 */
 
 #define PREDICATION_OP_CLEAR 0x0
 #define PREDICATION_OP_ZPASS 0x1
 #define PREDICATION_OP_PRIMCOUNT 0x2
 
+#define PRED_OP(x) ((x) << 16)
+
+#define PREDICATION_CONTINUE (1 << 31)
+
+#define PREDICATION_HINT_WAIT (0 << 12)
+#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
+
+#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
+#define PREDICATION_DRAW_VISIBLE (1 << 8)
+
 #define PKT_TYPE_S(x)                   (((x) & 0x3) << 30)
 #define PKT_TYPE_G(x)                   (((x) >> 30) & 0x3)
 #define PKT_TYPE_C                      0x3FFFFFFF
@@ -83,8 +136,9 @@
 #define PKT3_IT_OPCODE_S(x)             (((x) & 0xFF) << 8)
 #define PKT3_IT_OPCODE_G(x)             (((x) >> 8) & 0xFF)
 #define PKT3_IT_OPCODE_C                0xFFFF00FF
+#define PKT3_PRED_S(x)               (((x) >> 0) & 0x1)
 #define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
-#define PKT3(op, count) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count))
+#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate))
 
 /* Registers */
 #define R_008C00_SQ_CONFIG                           0x00008C00
diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h
deleted file mode 100644
index 4a19dcf8ddf..00000000000
--- a/src/gallium/winsys/r600/drm/r600d.h
+++ /dev/null
@@ -1,2241 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *      Jerome Glisse
- */
-#ifndef R600D_H
-#define R600D_H
-
-/* evergreen values */
-#define EG_RESOURCE_OFFSET                 0x00030000
-#define EG_RESOURCE_END                    0x00034000
-#define EG_LOOP_CONST_OFFSET               0x0003A200
-#define EG_LOOP_CONST_END                  0x0003A26C
-#define EG_BOOL_CONST_OFFSET               0x0003A500
-#define EG_BOOL_CONST_END                  0x0003A506
-
-
-#define R600_CONFIG_REG_OFFSET                 0X00008000
-#define R600_CONFIG_REG_END                    0X0000AC00
-#define R600_CONTEXT_REG_OFFSET                0X00028000
-#define R600_CONTEXT_REG_END                   0X00029000
-#define R600_ALU_CONST_OFFSET                  0X00030000
-#define R600_ALU_CONST_END                     0X00032000
-#define R600_RESOURCE_OFFSET                   0X00038000
-#define R600_RESOURCE_END                      0X0003C000
-#define R600_SAMPLER_OFFSET                    0X0003C000
-#define R600_SAMPLER_END                       0X0003CFF0
-#define R600_CTL_CONST_OFFSET                  0X0003CFF0
-#define R600_CTL_CONST_END                     0X0003E200
-#define R600_LOOP_CONST_OFFSET                 0X0003E200
-#define R600_LOOP_CONST_END                    0X0003E380
-#define R600_BOOL_CONST_OFFSET                 0X0003E380
-#define R600_BOOL_CONST_END                    0X00040000
-
-#define PKT3_NOP                               0x10
-#define PKT3_INDIRECT_BUFFER_END               0x17
-#define PKT3_SET_PREDICATION                   0x20
-#define PKT3_REG_RMW                           0x21
-#define PKT3_COND_EXEC                         0x22
-#define PKT3_PRED_EXEC                         0x23
-#define PKT3_START_3D_CMDBUF                   0x24
-#define PKT3_DRAW_INDEX_2                      0x27
-#define PKT3_CONTEXT_CONTROL                   0x28
-#define PKT3_DRAW_INDEX_IMMD_BE                0x29
-#define PKT3_INDEX_TYPE                        0x2A
-#define PKT3_DRAW_INDEX                        0x2B
-#define PKT3_DRAW_INDEX_AUTO                   0x2D
-#define PKT3_DRAW_INDEX_IMMD                   0x2E
-#define PKT3_NUM_INSTANCES                     0x2F
-#define PKT3_STRMOUT_BUFFER_UPDATE             0x34
-#define PKT3_INDIRECT_BUFFER_MP                0x38
-#define PKT3_MEM_SEMAPHORE                     0x39
-#define PKT3_MPEG_INDEX                        0x3A
-#define PKT3_WAIT_REG_MEM                      0x3C
-#define PKT3_MEM_WRITE                         0x3D
-#define PKT3_INDIRECT_BUFFER                   0x32
-#define PKT3_CP_INTERRUPT                      0x40
-#define PKT3_SURFACE_SYNC                      0x43
-#define PKT3_ME_INITIALIZE                     0x44
-#define PKT3_COND_WRITE                        0x45
-#define PKT3_EVENT_WRITE                       0x46
-#define PKT3_EVENT_WRITE_EOP                   0x47
-#define PKT3_ONE_REG_WRITE                     0x57
-#define PKT3_SET_CONFIG_REG                    0x68
-#define PKT3_SET_CONTEXT_REG                   0x69
-#define PKT3_SET_ALU_CONST                     0x6A
-#define PKT3_SET_BOOL_CONST                    0x6B
-#define PKT3_SET_LOOP_CONST                    0x6C
-#define PKT3_SET_RESOURCE                      0x6D
-#define PKT3_SET_SAMPLER                       0x6E
-#define PKT3_SET_CTL_CONST                     0x6F
-#define PKT3_SURFACE_BASE_UPDATE               0x73
-#define		SURFACE_BASE_UPDATE_DEPTH      (1 << 0)
-#define		SURFACE_BASE_UPDATE_COLOR(x)   (2 << (x))
-#define		SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
-
-#define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
-#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
-#define EVENT_TYPE_ZPASS_DONE                  0x15
-#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT   0x16
-#define		EVENT_TYPE(x)                           ((x) << 0)
-#define		EVENT_INDEX(x)                          ((x) << 8)
-                /* 0 - any non-TS event
-		 * 1 - ZPASS_DONE
-		 * 2 - SAMPLE_PIPELINESTAT
-		 * 3 - SAMPLE_STREAMOUTSTAT*
-		 * 4 - *S_PARTIAL_FLUSH
-		 * 5 - TS events
-		 */
-
-#define PREDICATION_OP_CLEAR 0x0
-#define PREDICATION_OP_ZPASS 0x1
-#define PREDICATION_OP_PRIMCOUNT 0x2
-
-#define PRED_OP(x) ((x) << 16)
-
-#define PREDICATION_CONTINUE (1 << 31)
-
-#define PREDICATION_HINT_WAIT (0 << 12)
-#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
-
-#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
-#define PREDICATION_DRAW_VISIBLE (1 << 8)
-
-#define PKT_TYPE_S(x)                   (((x) & 0x3) << 30)
-#define PKT_TYPE_G(x)                   (((x) >> 30) & 0x3)
-#define PKT_TYPE_C                      0x3FFFFFFF
-#define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
-#define PKT_COUNT_G(x)                  (((x) >> 16) & 0x3FFF)
-#define PKT_COUNT_C                     0xC000FFFF
-#define PKT0_BASE_INDEX_S(x)            (((x) & 0xFFFF) << 0)
-#define PKT0_BASE_INDEX_G(x)            (((x) >> 0) & 0xFFFF)
-#define PKT0_BASE_INDEX_C               0xFFFF0000
-#define PKT3_IT_OPCODE_S(x)             (((x) & 0xFF) << 8)
-#define PKT3_IT_OPCODE_G(x)             (((x) >> 8) & 0xFF)
-#define PKT3_IT_OPCODE_C                0xFFFF00FF
-#define PKT3_PRED_S(x)               (((x) >> 0) & 0x1)
-#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
-#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PRED_S(predicate))
-
-/* Registers */
-#define R_0280A0_CB_COLOR0_INFO                      0x0280A0
-#define   S_0280A0_ENDIAN(x)                           (((x) & 0x3) << 0)
-#define   G_0280A0_ENDIAN(x)                           (((x) >> 0) & 0x3)
-#define   C_0280A0_ENDIAN                              0xFFFFFFFC
-#define   S_0280A0_FORMAT(x)                           (((x) & 0x3F) << 2)
-#define   G_0280A0_FORMAT(x)                           (((x) >> 2) & 0x3F)
-#define   C_0280A0_FORMAT                              0xFFFFFF03
-#define     V_0280A0_COLOR_INVALID                     0x00000000
-#define     V_0280A0_COLOR_8                           0x00000001
-#define     V_0280A0_COLOR_4_4                         0x00000002
-#define     V_0280A0_COLOR_3_3_2                       0x00000003
-#define     V_0280A0_COLOR_16                          0x00000005
-#define     V_0280A0_COLOR_16_FLOAT                    0x00000006
-#define     V_0280A0_COLOR_8_8                         0x00000007
-#define     V_0280A0_COLOR_5_6_5                       0x00000008
-#define     V_0280A0_COLOR_6_5_5                       0x00000009
-#define     V_0280A0_COLOR_1_5_5_5                     0x0000000A
-#define     V_0280A0_COLOR_4_4_4_4                     0x0000000B
-#define     V_0280A0_COLOR_5_5_5_1                     0x0000000C
-#define     V_0280A0_COLOR_32                          0x0000000D
-#define     V_0280A0_COLOR_32_FLOAT                    0x0000000E
-#define     V_0280A0_COLOR_16_16                       0x0000000F
-#define     V_0280A0_COLOR_16_16_FLOAT                 0x00000010
-#define     V_0280A0_COLOR_8_24                        0x00000011
-#define     V_0280A0_COLOR_8_24_FLOAT                  0x00000012
-#define     V_0280A0_COLOR_24_8                        0x00000013
-#define     V_0280A0_COLOR_24_8_FLOAT                  0x00000014
-#define     V_0280A0_COLOR_10_11_11                    0x00000015
-#define     V_0280A0_COLOR_10_11_11_FLOAT              0x00000016
-#define     V_0280A0_COLOR_11_11_10                    0x00000017
-#define     V_0280A0_COLOR_11_11_10_FLOAT              0x00000018
-#define     V_0280A0_COLOR_2_10_10_10                  0x00000019
-#define     V_0280A0_COLOR_8_8_8_8                     0x0000001A
-#define     V_0280A0_COLOR_10_10_10_2                  0x0000001B
-#define     V_0280A0_COLOR_X24_8_32_FLOAT              0x0000001C
-#define     V_0280A0_COLOR_32_32                       0x0000001D
-#define     V_0280A0_COLOR_32_32_FLOAT                 0x0000001E
-#define     V_0280A0_COLOR_16_16_16_16                 0x0000001F
-#define     V_0280A0_COLOR_16_16_16_16_FLOAT           0x00000020
-#define     V_0280A0_COLOR_32_32_32_32                 0x00000022
-#define     V_0280A0_COLOR_32_32_32_32_FLOAT           0x00000023
-#define   S_0280A0_ARRAY_MODE(x)                       (((x) & 0xF) << 8)
-#define   G_0280A0_ARRAY_MODE(x)                       (((x) >> 8) & 0xF)
-#define   C_0280A0_ARRAY_MODE                          0xFFFFF0FF
-#define     V_0280A0_ARRAY_LINEAR_GENERAL              0x00000000
-#define     V_0280A0_ARRAY_LINEAR_ALIGNED              0x00000001
-#define     V_0280A0_ARRAY_1D_TILED_THIN1              0x00000002
-#define     V_0280A0_ARRAY_2D_TILED_THIN1              0x00000004
-#define   S_0280A0_NUMBER_TYPE(x)                      (((x) & 0x7) << 12)
-#define   G_0280A0_NUMBER_TYPE(x)                      (((x) >> 12) & 0x7)
-#define   C_0280A0_NUMBER_TYPE                         0xFFFF8FFF
-#define   S_0280A0_READ_SIZE(x)                        (((x) & 0x1) << 15)
-#define   G_0280A0_READ_SIZE(x)                        (((x) >> 15) & 0x1)
-#define   C_0280A0_READ_SIZE                           0xFFFF7FFF
-#define   S_0280A0_COMP_SWAP(x)                        (((x) & 0x3) << 16)
-#define   G_0280A0_COMP_SWAP(x)                        (((x) >> 16) & 0x3)
-#define   C_0280A0_COMP_SWAP                           0xFFFCFFFF
-#define   S_0280A0_TILE_MODE(x)                        (((x) & 0x3) << 18)
-#define   G_0280A0_TILE_MODE(x)                        (((x) >> 18) & 0x3)
-#define   C_0280A0_TILE_MODE                           0xFFF3FFFF
-#define   S_0280A0_BLEND_CLAMP(x)                      (((x) & 0x1) << 20)
-#define   G_0280A0_BLEND_CLAMP(x)                      (((x) >> 20) & 0x1)
-#define   C_0280A0_BLEND_CLAMP                         0xFFEFFFFF
-#define   S_0280A0_CLEAR_COLOR(x)                      (((x) & 0x1) << 21)
-#define   G_0280A0_CLEAR_COLOR(x)                      (((x) >> 21) & 0x1)
-#define   C_0280A0_CLEAR_COLOR                         0xFFDFFFFF
-#define   S_0280A0_BLEND_BYPASS(x)                     (((x) & 0x1) << 22)
-#define   G_0280A0_BLEND_BYPASS(x)                     (((x) >> 22) & 0x1)
-#define   C_0280A0_BLEND_BYPASS                        0xFFBFFFFF
-#define   S_0280A0_BLEND_FLOAT32(x)                    (((x) & 0x1) << 23)
-#define   G_0280A0_BLEND_FLOAT32(x)                    (((x) >> 23) & 0x1)
-#define   C_0280A0_BLEND_FLOAT32                       0xFF7FFFFF
-#define   S_0280A0_SIMPLE_FLOAT(x)                     (((x) & 0x1) << 24)
-#define   G_0280A0_SIMPLE_FLOAT(x)                     (((x) >> 24) & 0x1)
-#define   C_0280A0_SIMPLE_FLOAT                        0xFEFFFFFF
-#define   S_0280A0_ROUND_MODE(x)                       (((x) & 0x1) << 25)
-#define   G_0280A0_ROUND_MODE(x)                       (((x) >> 25) & 0x1)
-#define   C_0280A0_ROUND_MODE                          0xFDFFFFFF
-#define   S_0280A0_TILE_COMPACT(x)                     (((x) & 0x1) << 26)
-#define   G_0280A0_TILE_COMPACT(x)                     (((x) >> 26) & 0x1)
-#define   C_0280A0_TILE_COMPACT                        0xFBFFFFFF
-#define   S_0280A0_SOURCE_FORMAT(x)                    (((x) & 0x1) << 27)
-#define   G_0280A0_SOURCE_FORMAT(x)                    (((x) >> 27) & 0x1)
-#define   C_0280A0_SOURCE_FORMAT                       0xF7FFFFFF
-#define R_028060_CB_COLOR0_SIZE                      0x028060
-#define   S_028060_PITCH_TILE_MAX(x)                   (((x) & 0x3FF) << 0)
-#define   G_028060_PITCH_TILE_MAX(x)                   (((x) >> 0) & 0x3FF)
-#define   C_028060_PITCH_TILE_MAX                      0xFFFFFC00
-#define   S_028060_SLICE_TILE_MAX(x)                   (((x) & 0xFFFFF) << 10)
-#define   G_028060_SLICE_TILE_MAX(x)                   (((x) >> 10) & 0xFFFFF)
-#define   C_028060_SLICE_TILE_MAX                      0xC00003FF
-#define R_028800_DB_DEPTH_CONTROL                    0x028800
-#define   S_028800_STENCIL_ENABLE(x)                   (((x) & 0x1) << 0)
-#define   G_028800_STENCIL_ENABLE(x)                   (((x) >> 0) & 0x1)
-#define   C_028800_STENCIL_ENABLE                      0xFFFFFFFE
-#define   S_028800_Z_ENABLE(x)                         (((x) & 0x1) << 1)
-#define   G_028800_Z_ENABLE(x)                         (((x) >> 1) & 0x1)
-#define   C_028800_Z_ENABLE                            0xFFFFFFFD
-#define   S_028800_Z_WRITE_ENABLE(x)                   (((x) & 0x1) << 2)
-#define   G_028800_Z_WRITE_ENABLE(x)                   (((x) >> 2) & 0x1)
-#define   C_028800_Z_WRITE_ENABLE                      0xFFFFFFFB
-#define   S_028800_ZFUNC(x)                            (((x) & 0x7) << 4)
-#define   G_028800_ZFUNC(x)                            (((x) >> 4) & 0x7)
-#define   C_028800_ZFUNC                               0xFFFFFF8F
-#define   S_028800_BACKFACE_ENABLE(x)                  (((x) & 0x1) << 7)
-#define   G_028800_BACKFACE_ENABLE(x)                  (((x) >> 7) & 0x1)
-#define   C_028800_BACKFACE_ENABLE                     0xFFFFFF7F
-#define   S_028800_STENCILFUNC(x)                      (((x) & 0x7) << 8)
-#define   G_028800_STENCILFUNC(x)                      (((x) >> 8) & 0x7)
-#define   C_028800_STENCILFUNC                         0xFFFFF8FF
-#define   S_028800_STENCILFAIL(x)                      (((x) & 0x7) << 11)
-#define   G_028800_STENCILFAIL(x)                      (((x) >> 11) & 0x7)
-#define   C_028800_STENCILFAIL                         0xFFFFC7FF
-#define   S_028800_STENCILZPASS(x)                     (((x) & 0x7) << 14)
-#define   G_028800_STENCILZPASS(x)                     (((x) >> 14) & 0x7)
-#define   C_028800_STENCILZPASS                        0xFFFE3FFF
-#define   S_028800_STENCILZFAIL(x)                     (((x) & 0x7) << 17)
-#define   G_028800_STENCILZFAIL(x)                     (((x) >> 17) & 0x7)
-#define   C_028800_STENCILZFAIL                        0xFFF1FFFF
-#define   S_028800_STENCILFUNC_BF(x)                   (((x) & 0x7) << 20)
-#define   G_028800_STENCILFUNC_BF(x)                   (((x) >> 20) & 0x7)
-#define   C_028800_STENCILFUNC_BF                      0xFF8FFFFF
-#define   S_028800_STENCILFAIL_BF(x)                   (((x) & 0x7) << 23)
-#define   G_028800_STENCILFAIL_BF(x)                   (((x) >> 23) & 0x7)
-#define   C_028800_STENCILFAIL_BF                      0xFC7FFFFF
-#define   S_028800_STENCILZPASS_BF(x)                  (((x) & 0x7) << 26)
-#define   G_028800_STENCILZPASS_BF(x)                  (((x) >> 26) & 0x7)
-#define   C_028800_STENCILZPASS_BF                     0xE3FFFFFF
-#define   S_028800_STENCILZFAIL_BF(x)                  (((x) & 0x7) << 29)
-#define   G_028800_STENCILZFAIL_BF(x)                  (((x) >> 29) & 0x7)
-#define   C_028800_STENCILZFAIL_BF                     0x1FFFFFFF
-#define R_028010_DB_DEPTH_INFO                       0x028010
-#define   S_028010_FORMAT(x)                           (((x) & 0x7) << 0)
-#define   G_028010_FORMAT(x)                           (((x) >> 0) & 0x7)
-#define   C_028010_FORMAT                              0xFFFFFFF8
-#define     V_028010_DEPTH_INVALID                     0x00000000
-#define     V_028010_DEPTH_16                          0x00000001
-#define     V_028010_DEPTH_X8_24                       0x00000002
-#define     V_028010_DEPTH_8_24                        0x00000003
-#define     V_028010_DEPTH_X8_24_FLOAT                 0x00000004
-#define     V_028010_DEPTH_8_24_FLOAT                  0x00000005
-#define     V_028010_DEPTH_32_FLOAT                    0x00000006
-#define     V_028010_DEPTH_X24_8_32_FLOAT              0x00000007
-#define   S_028010_READ_SIZE(x)                        (((x) & 0x1) << 3)
-#define   G_028010_READ_SIZE(x)                        (((x) >> 3) & 0x1)
-#define   C_028010_READ_SIZE                           0xFFFFFFF7
-#define   S_028010_ARRAY_MODE(x)                       (((x) & 0xF) << 15)
-#define   G_028010_ARRAY_MODE(x)                       (((x) >> 15) & 0xF)
-#define   C_028010_ARRAY_MODE                          0xFFF87FFF
-#define   S_028010_TILE_SURFACE_ENABLE(x)              (((x) & 0x1) << 25)
-#define   G_028010_TILE_SURFACE_ENABLE(x)              (((x) >> 25) & 0x1)
-#define   C_028010_TILE_SURFACE_ENABLE                 0xFDFFFFFF
-#define   S_028010_TILE_COMPACT(x)                     (((x) & 0x1) << 26)
-#define   G_028010_TILE_COMPACT(x)                     (((x) >> 26) & 0x1)
-#define   C_028010_TILE_COMPACT                        0xFBFFFFFF
-#define   S_028010_ZRANGE_PRECISION(x)                 (((x) & 0x1) << 31)
-#define   G_028010_ZRANGE_PRECISION(x)                 (((x) >> 31) & 0x1)
-#define   C_028010_ZRANGE_PRECISION                    0x7FFFFFFF
-#define R_028000_DB_DEPTH_SIZE                       0x028000
-#define   S_028000_PITCH_TILE_MAX(x)                   (((x) & 0x3FF) << 0)
-#define   G_028000_PITCH_TILE_MAX(x)                   (((x) >> 0) & 0x3FF)
-#define   C_028000_PITCH_TILE_MAX                      0xFFFFFC00
-#define   S_028000_SLICE_TILE_MAX(x)                   (((x) & 0xFFFFF) << 10)
-#define   G_028000_SLICE_TILE_MAX(x)                   (((x) >> 10) & 0xFFFFF)
-#define   C_028000_SLICE_TILE_MAX                      0xC00003FF
-#define R_028004_DB_DEPTH_VIEW                       0x028004
-#define   S_028004_SLICE_START(x)                      (((x) & 0x7FF) << 0)
-#define   G_028004_SLICE_START(x)                      (((x) >> 0) & 0x7FF)
-#define   C_028004_SLICE_START                         0xFFFFF800
-#define   S_028004_SLICE_MAX(x)                        (((x) & 0x7FF) << 13)
-#define   G_028004_SLICE_MAX(x)                        (((x) >> 13) & 0x7FF)
-#define   C_028004_SLICE_MAX                           0xFF001FFF
-#define R_028D24_DB_HTILE_SURFACE                    0x028D24
-#define   S_028D24_HTILE_WIDTH(x)                      (((x) & 0x1) << 0)
-#define   G_028D24_HTILE_WIDTH(x)                      (((x) >> 0) & 0x1)
-#define   C_028D24_HTILE_WIDTH                         0xFFFFFFFE
-#define   S_028D24_HTILE_HEIGHT(x)                     (((x) & 0x1) << 1)
-#define   G_028D24_HTILE_HEIGHT(x)                     (((x) >> 1) & 0x1)
-#define   C_028D24_HTILE_HEIGHT                        0xFFFFFFFD
-#define   S_028D24_LINEAR(x)                           (((x) & 0x1) << 2)
-#define   G_028D24_LINEAR(x)                           (((x) >> 2) & 0x1)
-#define   C_028D24_LINEAR                              0xFFFFFFFB
-#define   S_028D24_FULL_CACHE(x)                       (((x) & 0x1) << 3)
-#define   G_028D24_FULL_CACHE(x)                       (((x) >> 3) & 0x1)
-#define   C_028D24_FULL_CACHE                          0xFFFFFFF7
-#define   S_028D24_HTILE_USES_PRELOAD_WIN(x)           (((x) & 0x1) << 4)
-#define   G_028D24_HTILE_USES_PRELOAD_WIN(x)           (((x) >> 4) & 0x1)
-#define   C_028D24_HTILE_USES_PRELOAD_WIN              0xFFFFFFEF
-#define   S_028D24_PRELOAD(x)                          (((x) & 0x1) << 5)
-#define   G_028D24_PRELOAD(x)                          (((x) >> 5) & 0x1)
-#define   C_028D24_PRELOAD                             0xFFFFFFDF
-#define   S_028D24_PREFETCH_WIDTH(x)                   (((x) & 0x3F) << 6)
-#define   G_028D24_PREFETCH_WIDTH(x)                   (((x) >> 6) & 0x3F)
-#define   C_028D24_PREFETCH_WIDTH                      0xFFFFF03F
-#define   S_028D24_PREFETCH_HEIGHT(x)                  (((x) & 0x3F) << 12)
-#define   G_028D24_PREFETCH_HEIGHT(x)                  (((x) >> 12) & 0x3F)
-#define   C_028D24_PREFETCH_HEIGHT                     0xFFFC0FFF
-#define R_028D34_DB_PREFETCH_LIMIT                   0x028D34
-#define   S_028D34_DEPTH_HEIGHT_TILE_MAX(x)            (((x) & 0x3FF) << 0)
-#define   G_028D34_DEPTH_HEIGHT_TILE_MAX(x)            (((x) >> 0) & 0x3FF)
-#define   C_028D34_DEPTH_HEIGHT_TILE_MAX               0xFFFFFC00
-#define R_028D10_DB_RENDER_OVERRIDE                  0x028D10
-#define   S_028D10_FORCE_HIZ_ENABLE(x)                 (((x) & 0x3) << 0)
-#define   G_028D10_FORCE_HIZ_ENABLE(x)                 (((x) >> 0) & 0x3)
-#define   C_028D10_FORCE_HIZ_ENABLE                    0xFFFFFFFC
-#define   S_028D10_FORCE_HIS_ENABLE0(x)                (((x) & 0x3) << 2)
-#define   G_028D10_FORCE_HIS_ENABLE0(x)                (((x) >> 2) & 0x3)
-#define   C_028D10_FORCE_HIS_ENABLE0                   0xFFFFFFF3
-#define   S_028D10_FORCE_HIS_ENABLE1(x)                (((x) & 0x3) << 4)
-#define   G_028D10_FORCE_HIS_ENABLE1(x)                (((x) >> 4) & 0x3)
-#define   C_028D10_FORCE_HIS_ENABLE1                   0xFFFFFFCF
-#define   S_028D10_FORCE_SHADER_Z_ORDER(x)             (((x) & 0x1) << 6)
-#define   G_028D10_FORCE_SHADER_Z_ORDER(x)             (((x) >> 6) & 0x1)
-#define   C_028D10_FORCE_SHADER_Z_ORDER                0xFFFFFFBF
-#define   S_028D10_FAST_Z_DISABLE(x)                   (((x) & 0x1) << 7)
-#define   G_028D10_FAST_Z_DISABLE(x)                   (((x) >> 7) & 0x1)
-#define   C_028D10_FAST_Z_DISABLE                      0xFFFFFF7F
-#define   S_028D10_FAST_STENCIL_DISABLE(x)             (((x) & 0x1) << 8)
-#define   G_028D10_FAST_STENCIL_DISABLE(x)             (((x) >> 8) & 0x1)
-#define   C_028D10_FAST_STENCIL_DISABLE                0xFFFFFEFF
-#define   S_028D10_NOOP_CULL_DISABLE(x)                (((x) & 0x1) << 9)
-#define   G_028D10_NOOP_CULL_DISABLE(x)                (((x) >> 9) & 0x1)
-#define   C_028D10_NOOP_CULL_DISABLE                   0xFFFFFDFF
-#define   S_028D10_FORCE_COLOR_KILL(x)                 (((x) & 0x1) << 10)
-#define   G_028D10_FORCE_COLOR_KILL(x)                 (((x) >> 10) & 0x1)
-#define   C_028D10_FORCE_COLOR_KILL                    0xFFFFFBFF
-#define   S_028D10_FORCE_Z_READ(x)                     (((x) & 0x1) << 11)
-#define   G_028D10_FORCE_Z_READ(x)                     (((x) >> 11) & 0x1)
-#define   C_028D10_FORCE_Z_READ                        0xFFFFF7FF
-#define   S_028D10_FORCE_STENCIL_READ(x)               (((x) & 0x1) << 12)
-#define   G_028D10_FORCE_STENCIL_READ(x)               (((x) >> 12) & 0x1)
-#define   C_028D10_FORCE_STENCIL_READ                  0xFFFFEFFF
-#define   S_028D10_FORCE_FULL_Z_RANGE(x)               (((x) & 0x3) << 13)
-#define   G_028D10_FORCE_FULL_Z_RANGE(x)               (((x) >> 13) & 0x3)
-#define   C_028D10_FORCE_FULL_Z_RANGE                  0xFFFF9FFF
-#define   S_028D10_FORCE_QC_SMASK_CONFLICT(x)          (((x) & 0x1) << 15)
-#define   G_028D10_FORCE_QC_SMASK_CONFLICT(x)          (((x) >> 15) & 0x1)
-#define   C_028D10_FORCE_QC_SMASK_CONFLICT             0xFFFF7FFF
-#define   S_028D10_DISABLE_VIEWPORT_CLAMP(x)           (((x) & 0x1) << 16)
-#define   G_028D10_DISABLE_VIEWPORT_CLAMP(x)           (((x) >> 16) & 0x1)
-#define   C_028D10_DISABLE_VIEWPORT_CLAMP              0xFFFEFFFF
-#define   S_028D10_IGNORE_SC_ZRANGE(x)                 (((x) & 0x1) << 17)
-#define   G_028D10_IGNORE_SC_ZRANGE(x)                 (((x) >> 17) & 0x1)
-#define   C_028D10_IGNORE_SC_ZRANGE                    0xFFFDFFFF
-#define R_028A40_VGT_GS_MODE                         0x028A40
-#define   S_028A40_MODE(x)                             (((x) & 0x3) << 0)
-#define   G_028A40_MODE(x)                             (((x) >> 0) & 0x3)
-#define   C_028A40_MODE                                0xFFFFFFFC
-#define   S_028A40_ES_PASSTHRU(x)                      (((x) & 0x1) << 2)
-#define   G_028A40_ES_PASSTHRU(x)                      (((x) >> 2) & 0x1)
-#define   C_028A40_ES_PASSTHRU                         0xFFFFFFFB
-#define   S_028A40_CUT_MODE(x)                         (((x) & 0x3) << 3)
-#define   G_028A40_CUT_MODE(x)                         (((x) >> 3) & 0x3)
-#define   C_028A40_CUT_MODE                            0xFFFFFFE7
-#define R_008DFC_SQ_CF_WORD0                         0x008DFC
-#define   S_008DFC_ADDR(x)                             (((x) & 0xFFFFFFFF) << 0)
-#define   G_008DFC_ADDR(x)                             (((x) >> 0) & 0xFFFFFFFF)
-#define   C_008DFC_ADDR                                0x00000000
-#define R_008DFC_SQ_CF_WORD1                         0x008DFC
-#define   S_008DFC_POP_COUNT(x)                        (((x) & 0x7) << 0)
-#define   G_008DFC_POP_COUNT(x)                        (((x) >> 0) & 0x7)
-#define   C_008DFC_POP_COUNT                           0xFFFFFFF8
-#define   S_008DFC_CF_CONST(x)                         (((x) & 0x1F) << 3)
-#define   G_008DFC_CF_CONST(x)                         (((x) >> 3) & 0x1F)
-#define   C_008DFC_CF_CONST                            0xFFFFFF07
-#define   S_008DFC_COND(x)                             (((x) & 0x3) << 8)
-#define   G_008DFC_COND(x)                             (((x) >> 8) & 0x3)
-#define   C_008DFC_COND                                0xFFFFFCFF
-#define   S_008DFC_COUNT(x)                            (((x) & 0x7) << 10)
-#define   G_008DFC_COUNT(x)                            (((x) >> 10) & 0x7)
-#define   C_008DFC_COUNT                               0xFFFFE3FF
-#define   S_008DFC_CALL_COUNT(x)                       (((x) & 0x3F) << 13)
-#define   G_008DFC_CALL_COUNT(x)                       (((x) >> 13) & 0x3F)
-#define   C_008DFC_CALL_COUNT                          0xFFF81FFF
-#define   S_008DFC_END_OF_PROGRAM(x)                   (((x) & 0x1) << 21)
-#define   G_008DFC_END_OF_PROGRAM(x)                   (((x) >> 21) & 0x1)
-#define   C_008DFC_END_OF_PROGRAM                      0xFFDFFFFF
-#define   S_008DFC_VALID_PIXEL_MODE(x)                 (((x) & 0x1) << 22)
-#define   G_008DFC_VALID_PIXEL_MODE(x)                 (((x) >> 22) & 0x1)
-#define   C_008DFC_VALID_PIXEL_MODE                    0xFFBFFFFF
-#define   S_008DFC_CF_INST(x)                          (((x) & 0x7F) << 23)
-#define   G_008DFC_CF_INST(x)                          (((x) >> 23) & 0x7F)
-#define   C_008DFC_CF_INST                             0xC07FFFFF
-#define     V_008DFC_SQ_CF_INST_NOP                    0x00000000
-#define     V_008DFC_SQ_CF_INST_TEX                    0x00000001
-#define     V_008DFC_SQ_CF_INST_VTX                    0x00000002
-#define     V_008DFC_SQ_CF_INST_VTX_TC                 0x00000003
-#define     V_008DFC_SQ_CF_INST_LOOP_START             0x00000004
-#define     V_008DFC_SQ_CF_INST_LOOP_END               0x00000005
-#define     V_008DFC_SQ_CF_INST_LOOP_START_DX10        0x00000006
-#define     V_008DFC_SQ_CF_INST_LOOP_START_NO_AL       0x00000007
-#define     V_008DFC_SQ_CF_INST_LOOP_CONTINUE          0x00000008
-#define     V_008DFC_SQ_CF_INST_LOOP_BREAK             0x00000009
-#define     V_008DFC_SQ_CF_INST_JUMP                   0x0000000A
-#define     V_008DFC_SQ_CF_INST_PUSH                   0x0000000B
-#define     V_008DFC_SQ_CF_INST_PUSH_ELSE              0x0000000C
-#define     V_008DFC_SQ_CF_INST_ELSE                   0x0000000D
-#define     V_008DFC_SQ_CF_INST_POP                    0x0000000E
-#define     V_008DFC_SQ_CF_INST_POP_JUMP               0x0000000F
-#define     V_008DFC_SQ_CF_INST_POP_PUSH               0x00000010
-#define     V_008DFC_SQ_CF_INST_POP_PUSH_ELSE          0x00000011
-#define     V_008DFC_SQ_CF_INST_CALL                   0x00000012
-#define     V_008DFC_SQ_CF_INST_CALL_FS                0x00000013
-#define     V_008DFC_SQ_CF_INST_RETURN                 0x00000014
-#define     V_008DFC_SQ_CF_INST_EMIT_VERTEX            0x00000015
-#define     V_008DFC_SQ_CF_INST_EMIT_CUT_VERTEX        0x00000016
-#define     V_008DFC_SQ_CF_INST_CUT_VERTEX             0x00000017
-#define     V_008DFC_SQ_CF_INST_KILL                   0x00000018
-#define   S_008DFC_WHOLE_QUAD_MODE(x)                  (((x) & 0x1) << 30)
-#define   G_008DFC_WHOLE_QUAD_MODE(x)                  (((x) >> 30) & 0x1)
-#define   C_008DFC_WHOLE_QUAD_MODE                     0xBFFFFFFF
-#define   S_008DFC_BARRIER(x)                          (((x) & 0x1) << 31)
-#define   G_008DFC_BARRIER(x)                          (((x) >> 31) & 0x1)
-#define   C_008DFC_BARRIER                             0x7FFFFFFF
-#define R_008DFC_SQ_CF_ALU_WORD0                     0x008DFC
-#define   S_008DFC_ALU_ADDR(x)                         (((x) & 0x3FFFFF) << 0)
-#define   G_008DFC_ALU_ADDR(x)                         (((x) >> 0) & 0x3FFFFF)
-#define   C_008DFC_ALU_ADDR                            0xFFC00000
-#define   S_008DFC_KCACHE_BANK0(x)                     (((x) & 0xF) << 22)
-#define   G_008DFC_KCACHE_BANK0(x)                     (((x) >> 22) & 0xF)
-#define   C_008DFC_KCACHE_BANK0                        0xFC3FFFFF
-#define   S_008DFC_KCACHE_BANK1(x)                     (((x) & 0xF) << 26)
-#define   G_008DFC_KCACHE_BANK1(x)                     (((x) >> 26) & 0xF)
-#define   C_008DFC_KCACHE_BANK1                        0xC3FFFFFF
-#define   S_008DFC_KCACHE_MODE0(x)                     (((x) & 0x3) << 30)
-#define   G_008DFC_KCACHE_MODE0(x)                     (((x) >> 30) & 0x3)
-#define   C_008DFC_KCACHE_MODE0                        0x3FFFFFFF
-#define R_008DFC_SQ_CF_ALU_WORD1                     0x008DFC
-#define   S_008DFC_KCACHE_MODE1(x)                     (((x) & 0x3) << 0)
-#define   G_008DFC_KCACHE_MODE1(x)                     (((x) >> 0) & 0x3)
-#define   C_008DFC_KCACHE_MODE1                        0xFFFFFFFC
-#define   S_008DFC_KCACHE_ADDR0(x)                     (((x) & 0xFF) << 2)
-#define   G_008DFC_KCACHE_ADDR0(x)                     (((x) >> 2) & 0xFF)
-#define   C_008DFC_KCACHE_ADDR0                        0xFFFFFC03
-#define   S_008DFC_KCACHE_ADDR1(x)                     (((x) & 0xFF) << 10)
-#define   G_008DFC_KCACHE_ADDR1(x)                     (((x) >> 10) & 0xFF)
-#define   C_008DFC_KCACHE_ADDR1                        0xFFFC03FF
-#define   S_008DFC_ALU_COUNT(x)                        (((x) & 0x7F) << 18)
-#define   G_008DFC_ALU_COUNT(x)                        (((x) >> 18) & 0x7F)
-#define   C_008DFC_ALU_COUNT                           0xFE03FFFF
-#define   S_008DFC_USES_WATERFALL(x)                   (((x) & 0x1) << 25)
-#define   G_008DFC_USES_WATERFALL(x)                   (((x) >> 25) & 0x1)
-#define   C_008DFC_USES_WATERFALL                      0xFDFFFFFF
-#define   S_008DFC_CF_ALU_INST(x)                      (((x) & 0xF) << 26)
-#define   G_008DFC_CF_ALU_INST(x)                      (((x) >> 26) & 0xF)
-#define   C_008DFC_CF_ALU_INST                         0xC3FFFFFF
-#define     V_008DFC_SQ_CF_INST_ALU                    0x00000008
-#define     V_008DFC_SQ_CF_INST_ALU_PUSH_BEFORE        0x00000009
-#define     V_008DFC_SQ_CF_INST_ALU_POP_AFTER          0x0000000A
-#define     V_008DFC_SQ_CF_INST_ALU_POP2_AFTER         0x0000000B
-#define     V_008DFC_SQ_CF_INST_ALU_CONTINUE           0x0000000D
-#define     V_008DFC_SQ_CF_INST_ALU_BREAK              0x0000000E
-#define     V_008DFC_SQ_CF_INST_ALU_ELSE_AFTER         0x0000000F
-#define   S_008DFC_WHOLE_QUAD_MODE(x)                  (((x) & 0x1) << 30)
-#define   G_008DFC_WHOLE_QUAD_MODE(x)                  (((x) >> 30) & 0x1)
-#define   C_008DFC_WHOLE_QUAD_MODE                     0xBFFFFFFF
-#define   S_008DFC_BARRIER(x)                          (((x) & 0x1) << 31)
-#define   G_008DFC_BARRIER(x)                          (((x) >> 31) & 0x1)
-#define   C_008DFC_BARRIER                             0x7FFFFFFF
-#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD0            0x008DFC
-#define   S_008DFC_ARRAY_BASE(x)                       (((x) & 0x1FFF) << 0)
-#define   G_008DFC_ARRAY_BASE(x)                       (((x) >> 0) & 0x1FFF)
-#define   C_008DFC_ARRAY_BASE                          0xFFFFE000
-#define   S_008DFC_TYPE(x)                             (((x) & 0x3) << 13)
-#define   G_008DFC_TYPE(x)                             (((x) >> 13) & 0x3)
-#define   C_008DFC_TYPE                                0xFFFF9FFF
-#define   S_008DFC_RW_GPR(x)                           (((x) & 0x7F) << 15)
-#define   G_008DFC_RW_GPR(x)                           (((x) >> 15) & 0x7F)
-#define   C_008DFC_RW_GPR                              0xFFC07FFF
-#define   S_008DFC_RW_REL(x)                           (((x) & 0x1) << 22)
-#define   G_008DFC_RW_REL(x)                           (((x) >> 22) & 0x1)
-#define   C_008DFC_RW_REL                              0xFFBFFFFF
-#define   S_008DFC_INDEX_GPR(x)                        (((x) & 0x7F) << 23)
-#define   G_008DFC_INDEX_GPR(x)                        (((x) >> 23) & 0x7F)
-#define   C_008DFC_INDEX_GPR                           0xC07FFFFF
-#define   S_008DFC_ELEM_SIZE(x)                        (((x) & 0x3) << 30)
-#define   G_008DFC_ELEM_SIZE(x)                        (((x) >> 30) & 0x3)
-#define   C_008DFC_ELEM_SIZE                           0x3FFFFFFF
-#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1            0x008DFC
-#define   S_008DFC_BURST_COUNT(x)                      (((x) & 0xF) << 17)
-#define   G_008DFC_BURST_COUNT(x)                      (((x) >> 17) & 0xF)
-#define   C_008DFC_BURST_COUNT                         0xFFE1FFFF
-#define   S_008DFC_END_OF_PROGRAM(x)                   (((x) & 0x1) << 21)
-#define   G_008DFC_END_OF_PROGRAM(x)                   (((x) >> 21) & 0x1)
-#define   C_008DFC_END_OF_PROGRAM                      0xFFDFFFFF
-#define   S_008DFC_VALID_PIXEL_MODE(x)                 (((x) & 0x1) << 22)
-#define   G_008DFC_VALID_PIXEL_MODE(x)                 (((x) >> 22) & 0x1)
-#define   C_008DFC_VALID_PIXEL_MODE                    0xFFBFFFFF
-#define   S_008DFC_CF_INST(x)                          (((x) & 0x7F) << 23)
-#define   G_008DFC_CF_INST(x)                          (((x) >> 23) & 0x7F)
-#define   C_008DFC_CF_INST                             0xC07FFFFF
-#define     V_008DFC_SQ_CF_INST_MEM_STREAM0            0x00000020
-#define     V_008DFC_SQ_CF_INST_MEM_STREAM1            0x00000021
-#define     V_008DFC_SQ_CF_INST_MEM_STREAM2            0x00000022
-#define     V_008DFC_SQ_CF_INST_MEM_STREAM3            0x00000023
-#define     V_008DFC_SQ_CF_INST_MEM_SCRATCH            0x00000024
-#define     V_008DFC_SQ_CF_INST_MEM_REDUCTION          0x00000025
-#define     V_008DFC_SQ_CF_INST_MEM_RING               0x00000026
-#define     V_008DFC_SQ_CF_INST_EXPORT                 0x00000027
-#define     V_008DFC_SQ_CF_INST_EXPORT_DONE            0x00000028
-#define   S_008DFC_WHOLE_QUAD_MODE(x)                  (((x) & 0x1) << 30)
-#define   G_008DFC_WHOLE_QUAD_MODE(x)                  (((x) >> 30) & 0x1)
-#define   C_008DFC_WHOLE_QUAD_MODE                     0xBFFFFFFF
-#define   S_008DFC_BARRIER(x)                          (((x) & 0x1) << 31)
-#define   G_008DFC_BARRIER(x)                          (((x) >> 31) & 0x1)
-#define   C_008DFC_BARRIER                             0x7FFFFFFF
-#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1_BUF        0x008DFC
-#define   S_008DFC_ARRAY_SIZE(x)                       (((x) & 0xFFF) << 0)
-#define   G_008DFC_ARRAY_SIZE(x)                       (((x) >> 0) & 0xFFF)
-#define   C_008DFC_ARRAY_SIZE                          0xFFFFF000
-#define   S_008DFC_COMP_MASK(x)                        (((x) & 0xF) << 12)
-#define   G_008DFC_COMP_MASK(x)                        (((x) >> 12) & 0xF)
-#define   C_008DFC_COMP_MASK                           0xFFFF0FFF
-#define R_008DFC_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ       0x008DFC
-#define   S_008DFC_SEL_X(x)                            (((x) & 0x7) << 0)
-#define   G_008DFC_SEL_X(x)                            (((x) >> 0) & 0x7)
-#define   C_008DFC_SEL_X                               0xFFFFFFF8
-#define   S_008DFC_SEL_Y(x)                            (((x) & 0x7) << 3)
-#define   G_008DFC_SEL_Y(x)                            (((x) >> 3) & 0x7)
-#define   C_008DFC_SEL_Y                               0xFFFFFFC7
-#define   S_008DFC_SEL_Z(x)                            (((x) & 0x7) << 6)
-#define   G_008DFC_SEL_Z(x)                            (((x) >> 6) & 0x7)
-#define   C_008DFC_SEL_Z                               0xFFFFFE3F
-#define   S_008DFC_SEL_W(x)                            (((x) & 0x7) << 9)
-#define   G_008DFC_SEL_W(x)                            (((x) >> 9) & 0x7)
-#define   C_008DFC_SEL_W                               0xFFFFF1FF
-#define R_008DFC_SQ_VTX_WORD0                        0x008DFC
-#define   S_008DFC_VTX_INST(x)                         (((x) & 0x1F) << 0)
-#define   G_008DFC_VTX_INST(x)                         (((x) >> 0) & 0x1F)
-#define   C_008DFC_VTX_INST                            0xFFFFFFE0
-#define   S_008DFC_FETCH_TYPE(x)                       (((x) & 0x3) << 5)
-#define   G_008DFC_FETCH_TYPE(x)                       (((x) >> 5) & 0x3)
-#define   C_008DFC_FETCH_TYPE                          0xFFFFFF9F
-#define   S_008DFC_FETCH_WHOLE_QUAD(x)                 (((x) & 0x1) << 7)
-#define   G_008DFC_FETCH_WHOLE_QUAD(x)                 (((x) >> 7) & 0x1)
-#define   C_008DFC_FETCH_WHOLE_QUAD                    0xFFFFFF7F
-#define   S_008DFC_BUFFER_ID(x)                        (((x) & 0xFF) << 8)
-#define   G_008DFC_BUFFER_ID(x)                        (((x) >> 8) & 0xFF)
-#define   C_008DFC_BUFFER_ID                           0xFFFF00FF
-#define   S_008DFC_SRC_GPR(x)                          (((x) & 0x7F) << 16)
-#define   G_008DFC_SRC_GPR(x)                          (((x) >> 16) & 0x7F)
-#define   C_008DFC_SRC_GPR                             0xFF80FFFF
-#define   S_008DFC_SRC_REL(x)                          (((x) & 0x1) << 23)
-#define   G_008DFC_SRC_REL(x)                          (((x) >> 23) & 0x1)
-#define   C_008DFC_SRC_REL                             0xFF7FFFFF
-#define   S_008DFC_SRC_SEL_X(x)                        (((x) & 0x3) << 24)
-#define   G_008DFC_SRC_SEL_X(x)                        (((x) >> 24) & 0x3)
-#define   C_008DFC_SRC_SEL_X                           0xFCFFFFFF
-#define   S_008DFC_MEGA_FETCH_COUNT(x)                 (((x) & 0x3F) << 26)
-#define   G_008DFC_MEGA_FETCH_COUNT(x)                 (((x) >> 26) & 0x3F)
-#define   C_008DFC_MEGA_FETCH_COUNT                    0x03FFFFFF
-#define R_008DFC_SQ_VTX_WORD1                        0x008DFC
-#define   S_008DFC_DST_SEL_X(x)                        (((x) & 0x7) << 9)
-#define   G_008DFC_DST_SEL_X(x)                        (((x) >> 9) & 0x7)
-#define   C_008DFC_DST_SEL_X                           0xFFFFF1FF
-#define   S_008DFC_DST_SEL_Y(x)                        (((x) & 0x7) << 12)
-#define   G_008DFC_DST_SEL_Y(x)                        (((x) >> 12) & 0x7)
-#define   C_008DFC_DST_SEL_Y                           0xFFFF8FFF
-#define   S_008DFC_DST_SEL_Z(x)                        (((x) & 0x7) << 15)
-#define   G_008DFC_DST_SEL_Z(x)                        (((x) >> 15) & 0x7)
-#define   C_008DFC_DST_SEL_Z                           0xFFFC7FFF
-#define   S_008DFC_DST_SEL_W(x)                        (((x) & 0x7) << 18)
-#define   G_008DFC_DST_SEL_W(x)                        (((x) >> 18) & 0x7)
-#define   C_008DFC_DST_SEL_W                           0xFFE3FFFF
-#define   S_008DFC_USE_CONST_FIELDS(x)                 (((x) & 0x1) << 21)
-#define   G_008DFC_USE_CONST_FIELDS(x)                 (((x) >> 21) & 0x1)
-#define   C_008DFC_USE_CONST_FIELDS                    0xFFDFFFFF
-#define   S_008DFC_DATA_FORMAT(x)                      (((x) & 0x3F) << 22)
-#define   G_008DFC_DATA_FORMAT(x)                      (((x) >> 22) & 0x3F)
-#define   C_008DFC_DATA_FORMAT                         0xF03FFFFF
-#define   S_008DFC_NUM_FORMAT_ALL(x)                   (((x) & 0x3) << 28)
-#define   G_008DFC_NUM_FORMAT_ALL(x)                   (((x) >> 28) & 0x3)
-#define   C_008DFC_NUM_FORMAT_ALL                      0xCFFFFFFF
-#define   S_008DFC_FORMAT_COMP_ALL(x)                  (((x) & 0x1) << 30)
-#define   G_008DFC_FORMAT_COMP_ALL(x)                  (((x) >> 30) & 0x1)
-#define   C_008DFC_FORMAT_COMP_ALL                     0xBFFFFFFF
-#define   S_008DFC_SRF_MODE_ALL(x)                     (((x) & 0x1) << 31)
-#define   G_008DFC_SRF_MODE_ALL(x)                     (((x) >> 31) & 0x1)
-#define   C_008DFC_SRF_MODE_ALL                        0x7FFFFFFF
-#define R_008DFC_SQ_VTX_WORD1_GPR                    0x008DFC
-#define   S_008DFC_DST_GPR(x)                          (((x) & 0x7F) << 0)
-#define   G_008DFC_DST_GPR(x)                          (((x) >> 0) & 0x7F)
-#define   C_008DFC_DST_GPR                             0xFFFFFF80
-#define   S_008DFC_DST_REL(x)                          (((x) & 0x1) << 7)
-#define   G_008DFC_DST_REL(x)                          (((x) >> 7) & 0x1)
-#define   C_008DFC_DST_REL                             0xFFFFFF7F
-#define R_008DFC_SQ_VTX_WORD2                        0x008DFC
-#define   S_008DFC_OFFSET(x)                           (((x) & 0xFFFF) << 0)
-#define   G_008DFC_OFFSET(x)                           (((x) >> 0) & 0xFFFF)
-#define   C_008DFC_OFFSET                              0xFFFF0000
-#define   S_008DFC_ENDIAN_SWAP(x)                      (((x) & 0x3) << 16)
-#define   G_008DFC_ENDIAN_SWAP(x)                      (((x) >> 16) & 0x3)
-#define   C_008DFC_ENDIAN_SWAP                         0xFFFCFFFF
-#define   S_008DFC_CONST_BUF_NO_STRIDE(x)              (((x) & 0x1) << 18)
-#define   G_008DFC_CONST_BUF_NO_STRIDE(x)              (((x) >> 18) & 0x1)
-#define   C_008DFC_CONST_BUF_NO_STRIDE                 0xFFFBFFFF
-#define   S_008DFC_MEGA_FETCH(x)                       (((x) & 0x1) << 19)
-#define   G_008DFC_MEGA_FETCH(x)                       (((x) >> 19) & 0x1)
-#define   C_008DFC_MEGA_FETCH                          0xFFF7FFFF
-#define   S_008DFC_ALT_CONST(x)                        (((x) & 0x1) << 20)
-#define   G_008DFC_ALT_CONST(x)                        (((x) >> 20) & 0x1)
-#define   C_008DFC_ALT_CONST                           0xFFEFFFFF
-#define R_008040_WAIT_UNTIL                          0x008040
-#define   S_008040_WAIT_CP_DMA_IDLE(x)                 (((x) & 0x1) << 8)
-#define   G_008040_WAIT_CP_DMA_IDLE(x)                 (((x) >> 8) & 0x1)
-#define   C_008040_WAIT_CP_DMA_IDLE                    0xFFFFFEFF
-#define   S_008040_WAIT_CMDFIFO(x)                     (((x) & 0x1) << 10)
-#define   G_008040_WAIT_CMDFIFO(x)                     (((x) >> 10) & 0x1)
-#define   C_008040_WAIT_CMDFIFO                        0xFFFFFBFF
-#define   S_008040_WAIT_2D_IDLE(x)                     (((x) & 0x1) << 14)
-#define   G_008040_WAIT_2D_IDLE(x)                     (((x) >> 14) & 0x1)
-#define   C_008040_WAIT_2D_IDLE                        0xFFFFBFFF
-#define   S_008040_WAIT_3D_IDLE(x)                     (((x) & 0x1) << 15)
-#define   G_008040_WAIT_3D_IDLE(x)                     (((x) >> 15) & 0x1)
-#define   C_008040_WAIT_3D_IDLE                        0xFFFF7FFF
-#define   S_008040_WAIT_2D_IDLECLEAN(x)                (((x) & 0x1) << 16)
-#define   G_008040_WAIT_2D_IDLECLEAN(x)                (((x) >> 16) & 0x1)
-#define   C_008040_WAIT_2D_IDLECLEAN                   0xFFFEFFFF
-#define   S_008040_WAIT_3D_IDLECLEAN(x)                (((x) & 0x1) << 17)
-#define   G_008040_WAIT_3D_IDLECLEAN(x)                (((x) >> 17) & 0x1)
-#define   C_008040_WAIT_3D_IDLECLEAN                   0xFFFDFFFF
-#define   S_008040_WAIT_EXTERN_SIG(x)                  (((x) & 0x1) << 19)
-#define   G_008040_WAIT_EXTERN_SIG(x)                  (((x) >> 19) & 0x1)
-#define   C_008040_WAIT_EXTERN_SIG                     0xFFF7FFFF
-#define   S_008040_CMDFIFO_ENTRIES(x)                  (((x) & 0x1F) << 20)
-#define   G_008040_CMDFIFO_ENTRIES(x)                  (((x) >> 20) & 0x1F)
-#define   C_008040_CMDFIFO_ENTRIES                     0xFE0FFFFF
-#define R_0286CC_SPI_PS_IN_CONTROL_0                 0x0286CC
-#define   S_0286CC_NUM_INTERP(x)                       (((x) & 0x3F) << 0)
-#define   G_0286CC_NUM_INTERP(x)                       (((x) >> 0) & 0x3F)
-#define   C_0286CC_NUM_INTERP                          0xFFFFFFC0
-#define   S_0286CC_POSITION_ENA(x)                     (((x) & 0x1) << 8)
-#define   G_0286CC_POSITION_ENA(x)                     (((x) >> 8) & 0x1)
-#define   C_0286CC_POSITION_ENA                        0xFFFFFEFF
-#define   S_0286CC_POSITION_CENTROID(x)                (((x) & 0x1) << 9)
-#define   G_0286CC_POSITION_CENTROID(x)                (((x) >> 9) & 0x1)
-#define   C_0286CC_POSITION_CENTROID                   0xFFFFFDFF
-#define   S_0286CC_POSITION_ADDR(x)                    (((x) & 0x1F) << 10)
-#define   G_0286CC_POSITION_ADDR(x)                    (((x) >> 10) & 0x1F)
-#define   C_0286CC_POSITION_ADDR                       0xFFFF83FF
-#define   S_0286CC_PARAM_GEN(x)                        (((x) & 0xF) << 15)
-#define   G_0286CC_PARAM_GEN(x)                        (((x) >> 15) & 0xF)
-#define   C_0286CC_PARAM_GEN                           0xFFF87FFF
-#define   S_0286CC_PARAM_GEN_ADDR(x)                   (((x) & 0x7F) << 19)
-#define   G_0286CC_PARAM_GEN_ADDR(x)                   (((x) >> 19) & 0x7F)
-#define   C_0286CC_PARAM_GEN_ADDR                      0xFC07FFFF
-#define   S_0286CC_BARYC_SAMPLE_CNTL(x)                (((x) & 0x3) << 26)
-#define   G_0286CC_BARYC_SAMPLE_CNTL(x)                (((x) >> 26) & 0x3)
-#define   C_0286CC_BARYC_SAMPLE_CNTL                   0xF3FFFFFF
-#define   S_0286CC_PERSP_GRADIENT_ENA(x)               (((x) & 0x1) << 28)
-#define   G_0286CC_PERSP_GRADIENT_ENA(x)               (((x) >> 28) & 0x1)
-#define   C_0286CC_PERSP_GRADIENT_ENA                  0xEFFFFFFF
-#define   S_0286CC_LINEAR_GRADIENT_ENA(x)              (((x) & 0x1) << 29)
-#define   G_0286CC_LINEAR_GRADIENT_ENA(x)              (((x) >> 29) & 0x1)
-#define   C_0286CC_LINEAR_GRADIENT_ENA                 0xDFFFFFFF
-#define   S_0286CC_POSITION_SAMPLE(x)                  (((x) & 0x1) << 30)
-#define   G_0286CC_POSITION_SAMPLE(x)                  (((x) >> 30) & 0x1)
-#define   C_0286CC_POSITION_SAMPLE                     0xBFFFFFFF
-#define   S_0286CC_BARYC_AT_SAMPLE_ENA(x)              (((x) & 0x1) << 31)
-#define   G_0286CC_BARYC_AT_SAMPLE_ENA(x)              (((x) >> 31) & 0x1)
-#define   C_0286CC_BARYC_AT_SAMPLE_ENA                 0x7FFFFFFF
-#define R_0286D0_SPI_PS_IN_CONTROL_1                 0x0286D0
-#define   S_0286D0_GEN_INDEX_PIX(x)                    (((x) & 0x1) << 0)
-#define   G_0286D0_GEN_INDEX_PIX(x)                    (((x) >> 0) & 0x1)
-#define   C_0286D0_GEN_INDEX_PIX                       0xFFFFFFFE
-#define   S_0286D0_GEN_INDEX_PIX_ADDR(x)               (((x) & 0x7F) << 1)
-#define   G_0286D0_GEN_INDEX_PIX_ADDR(x)               (((x) >> 1) & 0x7F)
-#define   C_0286D0_GEN_INDEX_PIX_ADDR                  0xFFFFFF01
-#define   S_0286D0_FRONT_FACE_ENA(x)                   (((x) & 0x1) << 8)
-#define   G_0286D0_FRONT_FACE_ENA(x)                   (((x) >> 8) & 0x1)
-#define   C_0286D0_FRONT_FACE_ENA                      0xFFFFFEFF
-#define   S_0286D0_FRONT_FACE_CHAN(x)                  (((x) & 0x3) << 9)
-#define   G_0286D0_FRONT_FACE_CHAN(x)                  (((x) >> 9) & 0x3)
-#define   C_0286D0_FRONT_FACE_CHAN                     0xFFFFF9FF
-#define   S_0286D0_FRONT_FACE_ALL_BITS(x)              (((x) & 0x1) << 11)
-#define   G_0286D0_FRONT_FACE_ALL_BITS(x)              (((x) >> 11) & 0x1)
-#define   C_0286D0_FRONT_FACE_ALL_BITS                 0xFFFFF7FF
-#define   S_0286D0_FRONT_FACE_ADDR(x)                  (((x) & 0x1F) << 12)
-#define   G_0286D0_FRONT_FACE_ADDR(x)                  (((x) >> 12) & 0x1F)
-#define   C_0286D0_FRONT_FACE_ADDR                     0xFFFE0FFF
-#define   S_0286D0_FOG_ADDR(x)                         (((x) & 0x7F) << 17)
-#define   G_0286D0_FOG_ADDR(x)                         (((x) >> 17) & 0x7F)
-#define   C_0286D0_FOG_ADDR                            0xFF01FFFF
-#define   S_0286D0_FIXED_PT_POSITION_ENA(x)            (((x) & 0x1) << 24)
-#define   G_0286D0_FIXED_PT_POSITION_ENA(x)            (((x) >> 24) & 0x1)
-#define   C_0286D0_FIXED_PT_POSITION_ENA               0xFEFFFFFF
-#define   S_0286D0_FIXED_PT_POSITION_ADDR(x)           (((x) & 0x1F) << 25)
-#define   G_0286D0_FIXED_PT_POSITION_ADDR(x)           (((x) >> 25) & 0x1F)
-#define   C_0286D0_FIXED_PT_POSITION_ADDR              0xC1FFFFFF
-#define R_0286C4_SPI_VS_OUT_CONFIG                   0x0286C4
-#define   S_0286C4_VS_PER_COMPONENT(x)                 (((x) & 0x1) << 0)
-#define   G_0286C4_VS_PER_COMPONENT(x)                 (((x) >> 0) & 0x1)
-#define   C_0286C4_VS_PER_COMPONENT                    0xFFFFFFFE
-#define   S_0286C4_VS_EXPORT_COUNT(x)                  (((x) & 0x1F) << 1)
-#define   G_0286C4_VS_EXPORT_COUNT(x)                  (((x) >> 1) & 0x1F)
-#define   C_0286C4_VS_EXPORT_COUNT                     0xFFFFFFC1
-#define   S_0286C4_VS_EXPORTS_FOG(x)                   (((x) & 0x1) << 8)
-#define   G_0286C4_VS_EXPORTS_FOG(x)                   (((x) >> 8) & 0x1)
-#define   C_0286C4_VS_EXPORTS_FOG                      0xFFFFFEFF
-#define   S_0286C4_VS_OUT_FOG_VEC_ADDR(x)              (((x) & 0x1F) << 9)
-#define   G_0286C4_VS_OUT_FOG_VEC_ADDR(x)              (((x) >> 9) & 0x1F)
-#define   C_0286C4_VS_OUT_FOG_VEC_ADDR                 0xFFFFC1FF
-#define R_028240_PA_SC_GENERIC_SCISSOR_TL            0x028240
-#define   S_028240_TL_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028240_TL_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028240_TL_X                                0xFFFFC000
-#define   S_028240_TL_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028240_TL_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028240_TL_Y                                0xC000FFFF
-#define   S_028240_WINDOW_OFFSET_DISABLE(x)            (((x) & 0x1) << 31)
-#define   G_028240_WINDOW_OFFSET_DISABLE(x)            (((x) >> 31) & 0x1)
-#define   C_028240_WINDOW_OFFSET_DISABLE               0x7FFFFFFF
-#define R_028244_PA_SC_GENERIC_SCISSOR_BR            0x028244
-#define   S_028244_BR_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028244_BR_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028244_BR_X                                0xFFFFC000
-#define   S_028244_BR_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028244_BR_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028244_BR_Y                                0xC000FFFF
-#define R_028030_PA_SC_SCREEN_SCISSOR_TL             0x028030
-#define   S_028030_TL_X(x)                             (((x) & 0x7FFF) << 0)
-#define   G_028030_TL_X(x)                             (((x) >> 0) & 0x7FFF)
-#define   C_028030_TL_X                                0xFFFF8000
-#define   S_028030_TL_Y(x)                             (((x) & 0x7FFF) << 16)
-#define   G_028030_TL_Y(x)                             (((x) >> 16) & 0x7FFF)
-#define   C_028030_TL_Y                                0x8000FFFF
-#define R_028034_PA_SC_SCREEN_SCISSOR_BR             0x028034
-#define   S_028034_BR_X(x)                             (((x) & 0x7FFF) << 0)
-#define   G_028034_BR_X(x)                             (((x) >> 0) & 0x7FFF)
-#define   C_028034_BR_X                                0xFFFF8000
-#define   S_028034_BR_Y(x)                             (((x) & 0x7FFF) << 16)
-#define   G_028034_BR_Y(x)                             (((x) >> 16) & 0x7FFF)
-#define   C_028034_BR_Y                                0x8000FFFF
-#define R_028204_PA_SC_WINDOW_SCISSOR_TL             0x028204
-#define   S_028204_TL_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028204_TL_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028204_TL_X                                0xFFFFC000
-#define   S_028204_TL_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028204_TL_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028204_TL_Y                                0xC000FFFF
-#define   S_028204_WINDOW_OFFSET_DISABLE(x)            (((x) & 0x1) << 31)
-#define   G_028204_WINDOW_OFFSET_DISABLE(x)            (((x) >> 31) & 0x1)
-#define   C_028204_WINDOW_OFFSET_DISABLE               0x7FFFFFFF
-#define R_028208_PA_SC_WINDOW_SCISSOR_BR             0x028208
-#define   S_028208_BR_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028208_BR_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028208_BR_X                                0xFFFFC000
-#define   S_028208_BR_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028208_BR_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028208_BR_Y                                0xC000FFFF
-#define R_0287F0_VGT_DRAW_INITIATOR                  0x0287F0
-#define   S_0287F0_SOURCE_SELECT(x)                    (((x) & 0x3) << 0)
-#define   G_0287F0_SOURCE_SELECT(x)                    (((x) >> 0) & 0x3)
-#define   C_0287F0_SOURCE_SELECT                       0xFFFFFFFC
-#define   S_0287F0_MAJOR_MODE(x)                       (((x) & 0x3) << 2)
-#define   G_0287F0_MAJOR_MODE(x)                       (((x) >> 2) & 0x3)
-#define   C_0287F0_MAJOR_MODE                          0xFFFFFFF3
-#define   S_0287F0_SPRITE_EN(x)                        (((x) & 0x1) << 4)
-#define   G_0287F0_SPRITE_EN(x)                        (((x) >> 4) & 0x1)
-#define   C_0287F0_SPRITE_EN                           0xFFFFFFEF
-#define   S_0287F0_NOT_EOP(x)                          (((x) & 0x1) << 5)
-#define   G_0287F0_NOT_EOP(x)                          (((x) >> 5) & 0x1)
-#define   C_0287F0_NOT_EOP                             0xFFFFFFDF
-#define   S_0287F0_USE_OPAQUE(x)                       (((x) & 0x1) << 6)
-#define   G_0287F0_USE_OPAQUE(x)                       (((x) >> 6) & 0x1)
-#define   C_0287F0_USE_OPAQUE                          0xFFFFFFBF
-#define R_0280A0_CB_COLOR0_INFO                      0x0280A0
-#define R_0280A4_CB_COLOR1_INFO                      0x0280A4
-#define R_0280A8_CB_COLOR2_INFO                      0x0280A8
-#define R_0280AC_CB_COLOR3_INFO                      0x0280AC
-#define R_0280B0_CB_COLOR4_INFO                      0x0280B0
-#define R_0280B4_CB_COLOR5_INFO                      0x0280B4
-#define R_0280B8_CB_COLOR6_INFO                      0x0280B8
-#define R_0280BC_CB_COLOR7_INFO                      0x0280BC
-#define R_02800C_DB_DEPTH_BASE                       0x02800C
-#define R_028000_DB_DEPTH_SIZE                       0x028000
-#define R_028004_DB_DEPTH_VIEW                       0x028004
-#define R_028010_DB_DEPTH_INFO                       0x028010
-#define R_028D24_DB_HTILE_SURFACE                    0x028D24
-#define R_028D34_DB_PREFETCH_LIMIT                   0x028D34
-#define R_0286D4_SPI_INTERP_CONTROL_0                0x0286D4
-#define R_028A48_PA_SC_MPASS_PS_CNTL                 0x028A48
-#define R_028C00_PA_SC_LINE_CNTL                     0x028C00
-#define R_028C04_PA_SC_AA_CONFIG                     0x028C04
-#define R_028C08_PA_SU_VTX_CNTL                      0x028C08
-#define R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX           0x028C1C
-#define R_028C48_PA_SC_AA_MASK                       0x028C48
-#define R_028810_PA_CL_CLIP_CNTL                     0x028810
-#define R_02881C_PA_CL_VS_OUT_CNTL                   0x02881C
-#define R_028820_PA_CL_NANINF_CNTL                   0x028820
-#define R_028C0C_PA_CL_GB_VERT_CLIP_ADJ              0x028C0C
-#define R_028C10_PA_CL_GB_VERT_DISC_ADJ              0x028C10
-#define R_028C14_PA_CL_GB_HORZ_CLIP_ADJ              0x028C14
-#define R_028C18_PA_CL_GB_HORZ_DISC_ADJ              0x028C18
-#define R_028814_PA_SU_SC_MODE_CNTL                  0x028814
-#define R_028A00_PA_SU_POINT_SIZE                    0x028A00
-#define R_028A04_PA_SU_POINT_MINMAX                  0x028A04
-#define R_028A08_PA_SU_LINE_CNTL                     0x028A08
-#define R_028A0C_PA_SC_LINE_STIPPLE                  0x028A0C
-#define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL       0x028DF8
-#define R_028DFC_PA_SU_POLY_OFFSET_CLAMP             0x028DFC
-#define R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE       0x028E00
-#define R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET      0x028E04
-#define R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE        0x028E08
-#define R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET       0x028E0C
-#define R_028818_PA_CL_VTE_CNTL                      0x028818
-#define R_02843C_PA_CL_VPORT_XSCALE_0                0x02843C
-#define R_028444_PA_CL_VPORT_YSCALE_0                0x028444
-#define R_02844C_PA_CL_VPORT_ZSCALE_0                0x02844C
-#define R_028440_PA_CL_VPORT_XOFFSET_0               0x028440
-#define R_028448_PA_CL_VPORT_YOFFSET_0               0x028448
-#define R_028450_PA_CL_VPORT_ZOFFSET_0               0x028450
-#define R_028250_PA_SC_VPORT_SCISSOR_0_TL            0x028250
-#define R_028254_PA_SC_VPORT_SCISSOR_0_BR            0x028254
-#define R_028780_CB_BLEND0_CONTROL                   0x028780
-#define R_028784_CB_BLEND1_CONTROL                   0x028784
-#define R_028788_CB_BLEND2_CONTROL                   0x028788
-#define R_02878C_CB_BLEND3_CONTROL                   0x02878C
-#define R_028790_CB_BLEND4_CONTROL                   0x028790
-#define R_028794_CB_BLEND5_CONTROL                   0x028794
-#define R_028798_CB_BLEND6_CONTROL                   0x028798
-#define R_02879C_CB_BLEND7_CONTROL                   0x02879C
-#define R_028804_CB_BLEND_CONTROL                    0x028804
-#define R_028028_DB_STENCIL_CLEAR                    0x028028
-#define R_02802C_DB_DEPTH_CLEAR                      0x02802C
-#define R_028430_DB_STENCILREFMASK                   0x028430
-#define R_028434_DB_STENCILREFMASK_BF                0x028434
-#define R_028800_DB_DEPTH_CONTROL                    0x028800
-#define R_02880C_DB_SHADER_CONTROL                   0x02880C
-#define R_028D0C_DB_RENDER_CONTROL                   0x028D0C
-#define   S_028D0C_DEPTH_CLEAR_ENABLE(x)               (((x) & 0x1) << 0)
-#define   S_028D0C_STENCIL_CLEAR_ENABLE(x)             (((x) & 0x1) << 1)
-#define   S_028D0C_DEPTH_COPY_ENABLE(x)                (((x) & 0x1) << 2)
-#define   S_028D0C_STENCIL_COPY_ENABLE(x)              (((x) & 0x1) << 3)
-#define   S_028D0C_RESUMMARIZE_ENABLE(x)               (((x) & 0x1) << 4)
-#define   S_028D0C_STENCIL_COMPRESS_DISABLE(x)         (((x) & 0x1) << 5)
-#define   S_028D0C_DEPTH_COMPRESS_DISABLE(x)           (((x) & 0x1) << 6)
-#define   S_028D0C_COPY_CENTROID(x)                    (((x) & 0x1) << 7)
-#define   S_028D0C_COPY_SAMPLE(x)                      (((x) & 0x1) << 8)
-#define   S_028D0C_R700_PERFECT_ZPASS_COUNTS(x)        (((x) & 0x1) << 15)
-#define R_028D10_DB_RENDER_OVERRIDE                  0x028D10
-#define R_028D2C_DB_SRESULTS_COMPARE_STATE1          0x028D2C
-#define R_028D30_DB_PRELOAD_CONTROL                  0x028D30
-#define R_028D44_DB_ALPHA_TO_MASK                    0x028D44
-#define R_028868_SQ_PGM_RESOURCES_VS                 0x028868
-#define R_0286CC_SPI_PS_IN_CONTROL_0                 0x0286CC
-#define R_0286D0_SPI_PS_IN_CONTROL_1                 0x0286D0
-#define R_028644_SPI_PS_INPUT_CNTL_0                 0x028644
-#define R_028648_SPI_PS_INPUT_CNTL_1                 0x028648
-#define R_02864C_SPI_PS_INPUT_CNTL_2                 0x02864C
-#define R_028650_SPI_PS_INPUT_CNTL_3                 0x028650
-#define R_028654_SPI_PS_INPUT_CNTL_4                 0x028654
-#define R_028658_SPI_PS_INPUT_CNTL_5                 0x028658
-#define R_02865C_SPI_PS_INPUT_CNTL_6                 0x02865C
-#define R_028660_SPI_PS_INPUT_CNTL_7                 0x028660
-#define R_028664_SPI_PS_INPUT_CNTL_8                 0x028664
-#define R_028668_SPI_PS_INPUT_CNTL_9                 0x028668
-#define R_02866C_SPI_PS_INPUT_CNTL_10                0x02866C
-#define R_028670_SPI_PS_INPUT_CNTL_11                0x028670
-#define R_028674_SPI_PS_INPUT_CNTL_12                0x028674
-#define R_028678_SPI_PS_INPUT_CNTL_13                0x028678
-#define R_02867C_SPI_PS_INPUT_CNTL_14                0x02867C
-#define R_028680_SPI_PS_INPUT_CNTL_15                0x028680
-#define R_028684_SPI_PS_INPUT_CNTL_16                0x028684
-#define R_028688_SPI_PS_INPUT_CNTL_17                0x028688
-#define R_02868C_SPI_PS_INPUT_CNTL_18                0x02868C
-#define R_028690_SPI_PS_INPUT_CNTL_19                0x028690
-#define R_028694_SPI_PS_INPUT_CNTL_20                0x028694
-#define R_028698_SPI_PS_INPUT_CNTL_21                0x028698
-#define R_02869C_SPI_PS_INPUT_CNTL_22                0x02869C
-#define R_0286A0_SPI_PS_INPUT_CNTL_23                0x0286A0
-#define R_0286A4_SPI_PS_INPUT_CNTL_24                0x0286A4
-#define R_0286A8_SPI_PS_INPUT_CNTL_25                0x0286A8
-#define R_0286AC_SPI_PS_INPUT_CNTL_26                0x0286AC
-#define R_0286B0_SPI_PS_INPUT_CNTL_27                0x0286B0
-#define R_0286B4_SPI_PS_INPUT_CNTL_28                0x0286B4
-#define R_0286B8_SPI_PS_INPUT_CNTL_29                0x0286B8
-#define R_0286BC_SPI_PS_INPUT_CNTL_30                0x0286BC
-#define R_0286C0_SPI_PS_INPUT_CNTL_31                0x0286C0
-#define R_028850_SQ_PGM_RESOURCES_PS                 0x028850
-#define R_028854_SQ_PGM_EXPORTS_PS                   0x028854
-#define R_008958_VGT_PRIMITIVE_TYPE                  0x008958
-#define R_028A7C_VGT_DMA_INDEX_TYPE                  0x028A7C
-#define R_028A88_VGT_DMA_NUM_INSTANCES               0x028A88
-#define R_008970_VGT_NUM_INDICES                     0x008970
-#define R_0287F0_VGT_DRAW_INITIATOR                  0x0287F0
-#define R_028238_CB_TARGET_MASK                      0x028238
-#define R_02823C_CB_SHADER_MASK                      0x02823C
-#define R_028060_CB_COLOR0_SIZE                      0x028060
-#define   S_028060_PITCH_TILE_MAX(x)                   (((x) & 0x3FF) << 0)
-#define   G_028060_PITCH_TILE_MAX(x)                   (((x) >> 0) & 0x3FF)
-#define   C_028060_PITCH_TILE_MAX                      0xFFFFFC00
-#define   S_028060_SLICE_TILE_MAX(x)                   (((x) & 0xFFFFF) << 10)
-#define   G_028060_SLICE_TILE_MAX(x)                   (((x) >> 10) & 0xFFFFF)
-#define   C_028060_SLICE_TILE_MAX                      0xC00003FF
-#define R_028064_CB_COLOR1_SIZE                      0x028064
-#define R_028068_CB_COLOR2_SIZE                      0x028068
-#define R_02806C_CB_COLOR3_SIZE                      0x02806C
-#define R_028070_CB_COLOR4_SIZE                      0x028070
-#define R_028074_CB_COLOR5_SIZE                      0x028074
-#define R_028078_CB_COLOR6_SIZE                      0x028078
-#define R_02807C_CB_COLOR7_SIZE                      0x02807C
-#define R_028040_CB_COLOR0_BASE                      0x028040
-#define R_028044_CB_COLOR1_BASE                      0x028044
-#define R_028048_CB_COLOR2_BASE                      0x028048
-#define R_02804C_CB_COLOR3_BASE                      0x02804C
-#define R_028050_CB_COLOR4_BASE                      0x028050
-#define R_028054_CB_COLOR5_BASE                      0x028054
-#define R_028058_CB_COLOR6_BASE                      0x028058
-#define R_02805C_CB_COLOR7_BASE                      0x02805C
-#define R_028240_PA_SC_GENERIC_SCISSOR_TL            0x028240
-#define   S_028240_TL_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028240_TL_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028240_TL_X                                0xFFFFC000
-#define   S_028240_TL_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028240_TL_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028240_TL_Y                                0xC000FFFF
-#define R_028C04_PA_SC_AA_CONFIG                     0x028C04
-#define   S_028C04_MSAA_NUM_SAMPLES(x)                 (((x) & 0x3) << 0)
-#define   G_028C04_MSAA_NUM_SAMPLES(x)                 (((x) >> 0) & 0x3)
-#define   C_028C04_MSAA_NUM_SAMPLES                    0xFFFFFFFC
-#define   S_028C04_AA_MASK_CENTROID_DTMN(x)            (((x) & 0x1) << 4)
-#define   G_028C04_AA_MASK_CENTROID_DTMN(x)            (((x) >> 4) & 0x1)
-#define   C_028C04_AA_MASK_CENTROID_DTMN               0xFFFFFFEF
-#define   S_028C04_MAX_SAMPLE_DIST(x)                  (((x) & 0xF) << 13)
-#define   G_028C04_MAX_SAMPLE_DIST(x)                  (((x) >> 13) & 0xF)
-#define   C_028C04_MAX_SAMPLE_DIST                     0xFFFE1FFF
-#define R_0288CC_SQ_PGM_CF_OFFSET_PS                 0x0288CC
-#define R_0288DC_SQ_PGM_CF_OFFSET_FS                 0x0288DC
-#define R_0288D0_SQ_PGM_CF_OFFSET_VS                 0x0288D0
-#define R_028840_SQ_PGM_START_PS                     0x028840
-#define R_028894_SQ_PGM_START_FS                     0x028894
-#define R_028858_SQ_PGM_START_VS                     0x028858
-#define R_028080_CB_COLOR0_VIEW                      0x028080
-#define   S_028080_SLICE_START(x)                      (((x) & 0x7FF) << 0)
-#define   G_028080_SLICE_START(x)                      (((x) >> 0) & 0x7FF)
-#define   C_028080_SLICE_START                         0xFFFFF800
-#define   S_028080_SLICE_MAX(x)                        (((x) & 0x7FF) << 13)
-#define   G_028080_SLICE_MAX(x)                        (((x) >> 13) & 0x7FF)
-#define   C_028080_SLICE_MAX                           0xFF001FFF
-#define R_028084_CB_COLOR1_VIEW                      0x028084
-#define R_028088_CB_COLOR2_VIEW                      0x028088
-#define R_02808C_CB_COLOR3_VIEW                      0x02808C
-#define R_028090_CB_COLOR4_VIEW                      0x028090
-#define R_028094_CB_COLOR5_VIEW                      0x028094
-#define R_028098_CB_COLOR6_VIEW                      0x028098
-#define R_02809C_CB_COLOR7_VIEW                      0x02809C
-#define R_028100_CB_COLOR0_MASK                      0x028100
-#define   S_028100_CMASK_BLOCK_MAX(x)                  (((x) & 0xFFF) << 0)
-#define   G_028100_CMASK_BLOCK_MAX(x)                  (((x) >> 0) & 0xFFF)
-#define   C_028100_CMASK_BLOCK_MAX                     0xFFFFF000
-#define   S_028100_FMASK_TILE_MAX(x)                   (((x) & 0xFFFFF) << 12)
-#define   G_028100_FMASK_TILE_MAX(x)                   (((x) >> 12) & 0xFFFFF)
-#define   C_028100_FMASK_TILE_MAX                      0x00000FFF
-#define R_028104_CB_COLOR1_MASK                      0x028104
-#define R_028108_CB_COLOR2_MASK                      0x028108
-#define R_02810C_CB_COLOR3_MASK                      0x02810C
-#define R_028110_CB_COLOR4_MASK                      0x028110
-#define R_028114_CB_COLOR5_MASK                      0x028114
-#define R_028118_CB_COLOR6_MASK                      0x028118
-#define R_02811C_CB_COLOR7_MASK                      0x02811C
-#define R_028040_CB_COLOR0_BASE                      0x028040
-#define   S_028040_BASE_256B(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028040_BASE_256B(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028040_BASE_256B                           0x00000000
-#define R_0280E0_CB_COLOR0_FRAG                      0x0280E0
-#define   S_0280E0_BASE_256B(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_0280E0_BASE_256B(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_0280E0_BASE_256B                           0x00000000
-#define R_0280E4_CB_COLOR1_FRAG                      0x0280E4
-#define R_0280E8_CB_COLOR2_FRAG                      0x0280E8
-#define R_0280EC_CB_COLOR3_FRAG                      0x0280EC
-#define R_0280F0_CB_COLOR4_FRAG                      0x0280F0
-#define R_0280F4_CB_COLOR5_FRAG                      0x0280F4
-#define R_0280F8_CB_COLOR6_FRAG                      0x0280F8
-#define R_0280FC_CB_COLOR7_FRAG                      0x0280FC
-#define R_0280C0_CB_COLOR0_TILE                      0x0280C0
-#define   S_0280C0_BASE_256B(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_0280C0_BASE_256B(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_0280C0_BASE_256B                           0x00000000
-#define R_0280C4_CB_COLOR1_TILE                      0x0280C4
-#define R_0280C8_CB_COLOR2_TILE                      0x0280C8
-#define R_0280CC_CB_COLOR3_TILE                      0x0280CC
-#define R_0280D0_CB_COLOR4_TILE                      0x0280D0
-#define R_0280D4_CB_COLOR5_TILE                      0x0280D4
-#define R_0280D8_CB_COLOR6_TILE                      0x0280D8
-#define R_0280DC_CB_COLOR7_TILE                      0x0280DC
-#define R_028808_CB_COLOR_CONTROL                    0x028808
-#define   S_028808_FOG_ENABLE(x)                       (((x) & 0x1) << 0)
-#define   G_028808_FOG_ENABLE(x)                       (((x) >> 0) & 0x1)
-#define   C_028808_FOG_ENABLE                          0xFFFFFFFE
-#define   S_028808_MULTIWRITE_ENABLE(x)                (((x) & 0x1) << 1)
-#define   G_028808_MULTIWRITE_ENABLE(x)                (((x) >> 1) & 0x1)
-#define   C_028808_MULTIWRITE_ENABLE                   0xFFFFFFFD
-#define   S_028808_DITHER_ENABLE(x)                    (((x) & 0x1) << 2)
-#define   G_028808_DITHER_ENABLE(x)                    (((x) >> 2) & 0x1)
-#define   C_028808_DITHER_ENABLE                       0xFFFFFFFB
-#define   S_028808_DEGAMMA_ENABLE(x)                   (((x) & 0x1) << 3)
-#define   G_028808_DEGAMMA_ENABLE(x)                   (((x) >> 3) & 0x1)
-#define   C_028808_DEGAMMA_ENABLE                      0xFFFFFFF7
-#define   S_028808_SPECIAL_OP(x)                       (((x) & 0x7) << 4)
-#define   G_028808_SPECIAL_OP(x)                       (((x) >> 4) & 0x7)
-#define   C_028808_SPECIAL_OP                          0xFFFFFF8F
-#define   S_028808_PER_MRT_BLEND(x)                    (((x) & 0x1) << 7)
-#define   G_028808_PER_MRT_BLEND(x)                    (((x) >> 7) & 0x1)
-#define   C_028808_PER_MRT_BLEND                       0xFFFFFF7F
-#define   S_028808_TARGET_BLEND_ENABLE(x)              (((x) & 0xFF) << 8)
-#define   G_028808_TARGET_BLEND_ENABLE(x)              (((x) >> 8) & 0xFF)
-#define   C_028808_TARGET_BLEND_ENABLE                 0xFFFF00FF
-#define   S_028808_ROP3(x)                             (((x) & 0xFF) << 16)
-#define   G_028808_ROP3(x)                             (((x) >> 16) & 0xFF)
-#define   C_028808_ROP3                                0xFF00FFFF
-#define R_028614_SPI_VS_OUT_ID_0                     0x028614
-#define   S_028614_SEMANTIC_0(x)                       (((x) & 0xFF) << 0)
-#define   G_028614_SEMANTIC_0(x)                       (((x) >> 0) & 0xFF)
-#define   C_028614_SEMANTIC_0                          0xFFFFFF00
-#define   S_028614_SEMANTIC_1(x)                       (((x) & 0xFF) << 8)
-#define   G_028614_SEMANTIC_1(x)                       (((x) >> 8) & 0xFF)
-#define   C_028614_SEMANTIC_1                          0xFFFF00FF
-#define   S_028614_SEMANTIC_2(x)                       (((x) & 0xFF) << 16)
-#define   G_028614_SEMANTIC_2(x)                       (((x) >> 16) & 0xFF)
-#define   C_028614_SEMANTIC_2                          0xFF00FFFF
-#define   S_028614_SEMANTIC_3(x)                       (((x) & 0xFF) << 24)
-#define   G_028614_SEMANTIC_3(x)                       (((x) >> 24) & 0xFF)
-#define   C_028614_SEMANTIC_3                          0x00FFFFFF
-#define R_028618_SPI_VS_OUT_ID_1                     0x028618
-#define R_02861C_SPI_VS_OUT_ID_2                     0x02861C
-#define R_028620_SPI_VS_OUT_ID_3                     0x028620
-#define R_028624_SPI_VS_OUT_ID_4                     0x028624
-#define R_028628_SPI_VS_OUT_ID_5                     0x028628
-#define R_02862C_SPI_VS_OUT_ID_6                     0x02862C
-#define R_028630_SPI_VS_OUT_ID_7                     0x028630
-#define R_028634_SPI_VS_OUT_ID_8                     0x028634
-#define R_028638_SPI_VS_OUT_ID_9                     0x028638
-#define R_038000_SQ_TEX_RESOURCE_WORD0_0             0x038000
-#define   S_038000_DIM(x)                              (((x) & 0x7) << 0)
-#define   G_038000_DIM(x)                              (((x) >> 0) & 0x7)
-#define   C_038000_DIM                                 0xFFFFFFF8
-#define   S_038000_TILE_MODE(x)                        (((x) & 0xF) << 3)
-#define   G_038000_TILE_MODE(x)                        (((x) >> 3) & 0xF)
-#define   C_038000_TILE_MODE                           0xFFFFFF87
-#define   S_038000_TILE_TYPE(x)                        (((x) & 0x1) << 7)
-#define   G_038000_TILE_TYPE(x)                        (((x) >> 7) & 0x1)
-#define   C_038000_TILE_TYPE                           0xFFFFFF7F
-#define   S_038000_PITCH(x)                            (((x) & 0x7FF) << 8)
-#define   G_038000_PITCH(x)                            (((x) >> 8) & 0x7FF)
-#define   C_038000_PITCH                               0xFFF800FF
-#define   S_038000_TEX_WIDTH(x)                        (((x) & 0x1FFF) << 19)
-#define   G_038000_TEX_WIDTH(x)                        (((x) >> 19) & 0x1FFF)
-#define   C_038000_TEX_WIDTH                           0x0007FFFF
-#define R_038004_SQ_TEX_RESOURCE_WORD1_0             0x038004
-#define   S_038004_TEX_HEIGHT(x)                       (((x) & 0x1FFF) << 0)
-#define   G_038004_TEX_HEIGHT(x)                       (((x) >> 0) & 0x1FFF)
-#define   C_038004_TEX_HEIGHT                          0xFFFFE000
-#define   S_038004_TEX_DEPTH(x)                        (((x) & 0x1FFF) << 13)
-#define   G_038004_TEX_DEPTH(x)                        (((x) >> 13) & 0x1FFF)
-#define   C_038004_TEX_DEPTH                           0xFC001FFF
-#define   S_038004_DATA_FORMAT(x)                      (((x) & 0x3F) << 26)
-#define   G_038004_DATA_FORMAT(x)                      (((x) >> 26) & 0x3F)
-#define   C_038004_DATA_FORMAT                         0x03FFFFFF
-#define     V_038004_COLOR_INVALID                     0x00000000
-#define     V_038004_COLOR_8                           0x00000001
-#define     V_038004_COLOR_4_4                         0x00000002
-#define     V_038004_COLOR_3_3_2                       0x00000003
-#define     V_038004_COLOR_16                          0x00000005
-#define     V_038004_COLOR_16_FLOAT                    0x00000006
-#define     V_038004_COLOR_8_8                         0x00000007
-#define     V_038004_COLOR_5_6_5                       0x00000008
-#define     V_038004_COLOR_6_5_5                       0x00000009
-#define     V_038004_COLOR_1_5_5_5                     0x0000000A
-#define     V_038004_COLOR_4_4_4_4                     0x0000000B
-#define     V_038004_COLOR_5_5_5_1                     0x0000000C
-#define     V_038004_COLOR_32                          0x0000000D
-#define     V_038004_COLOR_32_FLOAT                    0x0000000E
-#define     V_038004_COLOR_16_16                       0x0000000F
-#define     V_038004_COLOR_16_16_FLOAT                 0x00000010
-#define     V_038004_COLOR_8_24                        0x00000011
-#define     V_038004_COLOR_8_24_FLOAT                  0x00000012
-#define     V_038004_COLOR_24_8                        0x00000013
-#define     V_038004_COLOR_24_8_FLOAT                  0x00000014
-#define     V_038004_COLOR_10_11_11                    0x00000015
-#define     V_038004_COLOR_10_11_11_FLOAT              0x00000016
-#define     V_038004_COLOR_11_11_10                    0x00000017
-#define     V_038004_COLOR_11_11_10_FLOAT              0x00000018
-#define     V_038004_COLOR_2_10_10_10                  0x00000019
-#define     V_038004_COLOR_8_8_8_8                     0x0000001A
-#define     V_038004_COLOR_10_10_10_2                  0x0000001B
-#define     V_038004_COLOR_X24_8_32_FLOAT              0x0000001C
-#define     V_038004_COLOR_32_32                       0x0000001D
-#define     V_038004_COLOR_32_32_FLOAT                 0x0000001E
-#define     V_038004_COLOR_16_16_16_16                 0x0000001F
-#define     V_038004_COLOR_16_16_16_16_FLOAT           0x00000020
-#define     V_038004_COLOR_32_32_32_32                 0x00000022
-#define     V_038004_COLOR_32_32_32_32_FLOAT           0x00000023
-#define R_038008_SQ_TEX_RESOURCE_WORD2_0             0x038008
-#define   S_038008_BASE_ADDRESS(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_038008_BASE_ADDRESS(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_038008_BASE_ADDRESS                        0x00000000
-#define R_03800C_SQ_TEX_RESOURCE_WORD3_0             0x03800C
-#define   S_03800C_MIP_ADDRESS(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_03800C_MIP_ADDRESS(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_03800C_MIP_ADDRESS                         0x00000000
-#define R_038010_SQ_TEX_RESOURCE_WORD4_0             0x038010
-#define   S_038010_FORMAT_COMP_X(x)                    (((x) & 0x3) << 0)
-#define   G_038010_FORMAT_COMP_X(x)                    (((x) >> 0) & 0x3)
-#define   C_038010_FORMAT_COMP_X                       0xFFFFFFFC
-#define   S_038010_FORMAT_COMP_Y(x)                    (((x) & 0x3) << 2)
-#define   G_038010_FORMAT_COMP_Y(x)                    (((x) >> 2) & 0x3)
-#define   C_038010_FORMAT_COMP_Y                       0xFFFFFFF3
-#define   S_038010_FORMAT_COMP_Z(x)                    (((x) & 0x3) << 4)
-#define   G_038010_FORMAT_COMP_Z(x)                    (((x) >> 4) & 0x3)
-#define   C_038010_FORMAT_COMP_Z                       0xFFFFFFCF
-#define   S_038010_FORMAT_COMP_W(x)                    (((x) & 0x3) << 6)
-#define   G_038010_FORMAT_COMP_W(x)                    (((x) >> 6) & 0x3)
-#define   C_038010_FORMAT_COMP_W                       0xFFFFFF3F
-#define   S_038010_NUM_FORMAT_ALL(x)                   (((x) & 0x3) << 8)
-#define   G_038010_NUM_FORMAT_ALL(x)                   (((x) >> 8) & 0x3)
-#define   C_038010_NUM_FORMAT_ALL                      0xFFFFFCFF
-#define   S_038010_SRF_MODE_ALL(x)                     (((x) & 0x1) << 10)
-#define   G_038010_SRF_MODE_ALL(x)                     (((x) >> 10) & 0x1)
-#define   C_038010_SRF_MODE_ALL                        0xFFFFFBFF
-#define   S_038010_FORCE_DEGAMMA(x)                    (((x) & 0x1) << 11)
-#define   G_038010_FORCE_DEGAMMA(x)                    (((x) >> 11) & 0x1)
-#define   C_038010_FORCE_DEGAMMA                       0xFFFFF7FF
-#define   S_038010_ENDIAN_SWAP(x)                      (((x) & 0x3) << 12)
-#define   G_038010_ENDIAN_SWAP(x)                      (((x) >> 12) & 0x3)
-#define   C_038010_ENDIAN_SWAP                         0xFFFFCFFF
-#define   S_038010_REQUEST_SIZE(x)                     (((x) & 0x3) << 14)
-#define   G_038010_REQUEST_SIZE(x)                     (((x) >> 14) & 0x3)
-#define   C_038010_REQUEST_SIZE                        0xFFFF3FFF
-#define   S_038010_DST_SEL_X(x)                        (((x) & 0x7) << 16)
-#define   G_038010_DST_SEL_X(x)                        (((x) >> 16) & 0x7)
-#define   C_038010_DST_SEL_X                           0xFFF8FFFF
-#define   S_038010_DST_SEL_Y(x)                        (((x) & 0x7) << 19)
-#define   G_038010_DST_SEL_Y(x)                        (((x) >> 19) & 0x7)
-#define   C_038010_DST_SEL_Y                           0xFFC7FFFF
-#define   S_038010_DST_SEL_Z(x)                        (((x) & 0x7) << 22)
-#define   G_038010_DST_SEL_Z(x)                        (((x) >> 22) & 0x7)
-#define   C_038010_DST_SEL_Z                           0xFE3FFFFF
-#define   S_038010_DST_SEL_W(x)                        (((x) & 0x7) << 25)
-#define   G_038010_DST_SEL_W(x)                        (((x) >> 25) & 0x7)
-#define   C_038010_DST_SEL_W                           0xF1FFFFFF
-#define   S_038010_BASE_LEVEL(x)                       (((x) & 0xF) << 28)
-#define   G_038010_BASE_LEVEL(x)                       (((x) >> 28) & 0xF)
-#define   C_038010_BASE_LEVEL                          0x0FFFFFFF
-#define R_038014_SQ_TEX_RESOURCE_WORD5_0             0x038014
-#define   S_038014_LAST_LEVEL(x)                       (((x) & 0xF) << 0)
-#define   G_038014_LAST_LEVEL(x)                       (((x) >> 0) & 0xF)
-#define   C_038014_LAST_LEVEL                          0xFFFFFFF0
-#define   S_038014_BASE_ARRAY(x)                       (((x) & 0x1FFF) << 4)
-#define   G_038014_BASE_ARRAY(x)                       (((x) >> 4) & 0x1FFF)
-#define   C_038014_BASE_ARRAY                          0xFFFE000F
-#define   S_038014_LAST_ARRAY(x)                       (((x) & 0x1FFF) << 17)
-#define   G_038014_LAST_ARRAY(x)                       (((x) >> 17) & 0x1FFF)
-#define   C_038014_LAST_ARRAY                          0xC001FFFF
-#define R_038018_SQ_TEX_RESOURCE_WORD6_0             0x038018
-#define   S_038018_MPEG_CLAMP(x)                       (((x) & 0x3) << 0)
-#define   G_038018_MPEG_CLAMP(x)                       (((x) >> 0) & 0x3)
-#define   C_038018_MPEG_CLAMP                          0xFFFFFFFC
-#define   S_038018_PERF_MODULATION(x)                  (((x) & 0x7) << 5)
-#define   G_038018_PERF_MODULATION(x)                  (((x) >> 5) & 0x7)
-#define   C_038018_PERF_MODULATION                     0xFFFFFF1F
-#define   S_038018_INTERLACED(x)                       (((x) & 0x1) << 8)
-#define   G_038018_INTERLACED(x)                       (((x) >> 8) & 0x1)
-#define   C_038018_INTERLACED                          0xFFFFFEFF
-#define   S_038018_TYPE(x)                             (((x) & 0x3) << 30)
-#define   G_038018_TYPE(x)                             (((x) >> 30) & 0x3)
-#define   C_038018_TYPE                                0x3FFFFFFF
-#define R_008040_WAIT_UNTIL                          0x008040
-#define   S_008040_WAIT_CP_DMA_IDLE(x)                 (((x) & 0x1) << 8)
-#define   G_008040_WAIT_CP_DMA_IDLE(x)                 (((x) >> 8) & 0x1)
-#define   C_008040_WAIT_CP_DMA_IDLE                    0xFFFFFEFF
-#define   S_008040_WAIT_CMDFIFO(x)                     (((x) & 0x1) << 10)
-#define   G_008040_WAIT_CMDFIFO(x)                     (((x) >> 10) & 0x1)
-#define   C_008040_WAIT_CMDFIFO                        0xFFFFFBFF
-#define   S_008040_WAIT_2D_IDLE(x)                     (((x) & 0x1) << 14)
-#define   G_008040_WAIT_2D_IDLE(x)                     (((x) >> 14) & 0x1)
-#define   C_008040_WAIT_2D_IDLE                        0xFFFFBFFF
-#define   S_008040_WAIT_3D_IDLE(x)                     (((x) & 0x1) << 15)
-#define   G_008040_WAIT_3D_IDLE(x)                     (((x) >> 15) & 0x1)
-#define   C_008040_WAIT_3D_IDLE                        0xFFFF7FFF
-#define   S_008040_WAIT_2D_IDLECLEAN(x)                (((x) & 0x1) << 16)
-#define   G_008040_WAIT_2D_IDLECLEAN(x)                (((x) >> 16) & 0x1)
-#define   C_008040_WAIT_2D_IDLECLEAN                   0xFFFEFFFF
-#define   S_008040_WAIT_3D_IDLECLEAN(x)                (((x) & 0x1) << 17)
-#define   G_008040_WAIT_3D_IDLECLEAN(x)                (((x) >> 17) & 0x1)
-#define   C_008040_WAIT_3D_IDLECLEAN                   0xFFFDFFFF
-#define   S_008040_WAIT_EXTERN_SIG(x)                  (((x) & 0x1) << 19)
-#define   G_008040_WAIT_EXTERN_SIG(x)                  (((x) >> 19) & 0x1)
-#define   C_008040_WAIT_EXTERN_SIG                     0xFFF7FFFF
-#define   S_008040_CMDFIFO_ENTRIES(x)                  (((x) & 0x1F) << 20)
-#define   G_008040_CMDFIFO_ENTRIES(x)                  (((x) >> 20) & 0x1F)
-#define   C_008040_CMDFIFO_ENTRIES                     0xFE0FFFFF
-#define R_008958_VGT_PRIMITIVE_TYPE                  0x008958
-#define   S_008958_PRIM_TYPE(x)                        (((x) & 0x3F) << 0)
-#define   G_008958_PRIM_TYPE(x)                        (((x) >> 0) & 0x3F)
-#define   C_008958_PRIM_TYPE                           0xFFFFFFC0
-#define R_008C00_SQ_CONFIG                           0x008C00
-#define   S_008C00_VC_ENABLE(x)                        (((x) & 0x1) << 0)
-#define   G_008C00_VC_ENABLE(x)                        (((x) >> 0) & 0x1)
-#define   C_008C00_VC_ENABLE                           0xFFFFFFFE
-#define   S_008C00_EXPORT_SRC_C(x)                     (((x) & 0x1) << 1)
-#define   G_008C00_EXPORT_SRC_C(x)                     (((x) >> 1) & 0x1)
-#define   C_008C00_EXPORT_SRC_C                        0xFFFFFFFD
-#define   S_008C00_DX9_CONSTS(x)                       (((x) & 0x1) << 2)
-#define   G_008C00_DX9_CONSTS(x)                       (((x) >> 2) & 0x1)
-#define   C_008C00_DX9_CONSTS                          0xFFFFFFFB
-#define   S_008C00_ALU_INST_PREFER_VECTOR(x)           (((x) & 0x1) << 3)
-#define   G_008C00_ALU_INST_PREFER_VECTOR(x)           (((x) >> 3) & 0x1)
-#define   C_008C00_ALU_INST_PREFER_VECTOR              0xFFFFFFF7
-#define   S_008C00_DX10_CLAMP(x)                       (((x) & 0x1) << 4)
-#define   G_008C00_DX10_CLAMP(x)                       (((x) >> 4) & 0x1)
-#define   C_008C00_DX10_CLAMP                          0xFFFFFFEF
-#define   S_008C00_ALU_PREFER_ONE_WATERFALL(x)         (((x) & 0x1) << 5)
-#define   G_008C00_ALU_PREFER_ONE_WATERFALL(x)         (((x) >> 5) & 0x1)
-#define   C_008C00_ALU_PREFER_ONE_WATERFALL            0xFFFFFFDF
-#define   S_008C00_ALU_MAX_ONE_WATERFALL(x)            (((x) & 0x1) << 6)
-#define   G_008C00_ALU_MAX_ONE_WATERFALL(x)            (((x) >> 6) & 0x1)
-#define   C_008C00_ALU_MAX_ONE_WATERFALL               0xFFFFFFBF
-#define   S_008C00_CLAUSE_SEQ_PRIO(x)                  (((x) & 0x3) << 8)
-#define   G_008C00_CLAUSE_SEQ_PRIO(x)                  (((x) >> 8) & 0x3)
-#define   C_008C00_CLAUSE_SEQ_PRIO                     0xFFFFFCFF
-#define   S_008C00_PS_PRIO(x)                          (((x) & 0x3) << 24)
-#define   G_008C00_PS_PRIO(x)                          (((x) >> 24) & 0x3)
-#define   C_008C00_PS_PRIO                             0xFCFFFFFF
-#define   S_008C00_VS_PRIO(x)                          (((x) & 0x3) << 26)
-#define   G_008C00_VS_PRIO(x)                          (((x) >> 26) & 0x3)
-#define   C_008C00_VS_PRIO                             0xF3FFFFFF
-#define   S_008C00_GS_PRIO(x)                          (((x) & 0x3) << 28)
-#define   G_008C00_GS_PRIO(x)                          (((x) >> 28) & 0x3)
-#define   C_008C00_GS_PRIO                             0xCFFFFFFF
-#define   S_008C00_ES_PRIO(x)                          (((x) & 0x3) << 30)
-#define   G_008C00_ES_PRIO(x)                          (((x) >> 30) & 0x3)
-#define   C_008C00_ES_PRIO                             0x3FFFFFFF
-#define R_008C04_SQ_GPR_RESOURCE_MGMT_1              0x008C04
-#define   S_008C04_NUM_PS_GPRS(x)                      (((x) & 0xFF) << 0)
-#define   G_008C04_NUM_PS_GPRS(x)                      (((x) >> 0) & 0xFF)
-#define   C_008C04_NUM_PS_GPRS                         0xFFFFFF00
-#define   S_008C04_NUM_VS_GPRS(x)                      (((x) & 0xFF) << 16)
-#define   G_008C04_NUM_VS_GPRS(x)                      (((x) >> 16) & 0xFF)
-#define   C_008C04_NUM_VS_GPRS                         0xFF00FFFF
-#define   S_008C04_NUM_CLAUSE_TEMP_GPRS(x)             (((x) & 0xF) << 28)
-#define   G_008C04_NUM_CLAUSE_TEMP_GPRS(x)             (((x) >> 28) & 0xF)
-#define   C_008C04_NUM_CLAUSE_TEMP_GPRS                0x0FFFFFFF
-#define R_008C08_SQ_GPR_RESOURCE_MGMT_2              0x008C08
-#define   S_008C08_NUM_GS_GPRS(x)                      (((x) & 0xFF) << 0)
-#define   G_008C08_NUM_GS_GPRS(x)                      (((x) >> 0) & 0xFF)
-#define   C_008C08_NUM_GS_GPRS                         0xFFFFFF00
-#define   S_008C08_NUM_ES_GPRS(x)                      (((x) & 0xFF) << 16)
-#define   G_008C08_NUM_ES_GPRS(x)                      (((x) >> 16) & 0xFF)
-#define   C_008C08_NUM_ES_GPRS                         0xFF00FFFF
-#define R_008C0C_SQ_THREAD_RESOURCE_MGMT             0x008C0C
-#define   S_008C0C_NUM_PS_THREADS(x)                   (((x) & 0xFF) << 0)
-#define   G_008C0C_NUM_PS_THREADS(x)                   (((x) >> 0) & 0xFF)
-#define   C_008C0C_NUM_PS_THREADS                      0xFFFFFF00
-#define   S_008C0C_NUM_VS_THREADS(x)                   (((x) & 0xFF) << 8)
-#define   G_008C0C_NUM_VS_THREADS(x)                   (((x) >> 8) & 0xFF)
-#define   C_008C0C_NUM_VS_THREADS                      0xFFFF00FF
-#define   S_008C0C_NUM_GS_THREADS(x)                   (((x) & 0xFF) << 16)
-#define   G_008C0C_NUM_GS_THREADS(x)                   (((x) >> 16) & 0xFF)
-#define   C_008C0C_NUM_GS_THREADS                      0xFF00FFFF
-#define   S_008C0C_NUM_ES_THREADS(x)                   (((x) & 0xFF) << 24)
-#define   G_008C0C_NUM_ES_THREADS(x)                   (((x) >> 24) & 0xFF)
-#define   C_008C0C_NUM_ES_THREADS                      0x00FFFFFF
-#define R_008C10_SQ_STACK_RESOURCE_MGMT_1            0x008C10
-#define   S_008C10_NUM_PS_STACK_ENTRIES(x)             (((x) & 0xFFF) << 0)
-#define   G_008C10_NUM_PS_STACK_ENTRIES(x)             (((x) >> 0) & 0xFFF)
-#define   C_008C10_NUM_PS_STACK_ENTRIES                0xFFFFF000
-#define   S_008C10_NUM_VS_STACK_ENTRIES(x)             (((x) & 0xFFF) << 16)
-#define   G_008C10_NUM_VS_STACK_ENTRIES(x)             (((x) >> 16) & 0xFFF)
-#define   C_008C10_NUM_VS_STACK_ENTRIES                0xF000FFFF
-#define R_008C14_SQ_STACK_RESOURCE_MGMT_2            0x008C14
-#define   S_008C14_NUM_GS_STACK_ENTRIES(x)             (((x) & 0xFFF) << 0)
-#define   G_008C14_NUM_GS_STACK_ENTRIES(x)             (((x) >> 0) & 0xFFF)
-#define   C_008C14_NUM_GS_STACK_ENTRIES                0xFFFFF000
-#define   S_008C14_NUM_ES_STACK_ENTRIES(x)             (((x) & 0xFFF) << 16)
-#define   G_008C14_NUM_ES_STACK_ENTRIES(x)             (((x) >> 16) & 0xFFF)
-#define   C_008C14_NUM_ES_STACK_ENTRIES                0xF000FFFF
-#define R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ        0x008D8C
-#define   S_008D8C_RING0_OFFSET(x)                     (((x) & 0xFF) << 0)
-#define   G_008D8C_RING0_OFFSET(x)                     (((x) >> 0) & 0xFF)
-#define   C_008D8C_RING0_OFFSET                        0xFFFFFF00
-#define   S_008D8C_ISOLATE_ES_ENABLE(x)                (((x) & 0x1) << 12)
-#define   G_008D8C_ISOLATE_ES_ENABLE(x)                (((x) >> 12) & 0x1)
-#define   C_008D8C_ISOLATE_ES_ENABLE                   0xFFFFEFFF
-#define   S_008D8C_ISOLATE_GS_ENABLE(x)                (((x) & 0x1) << 13)
-#define   G_008D8C_ISOLATE_GS_ENABLE(x)                (((x) >> 13) & 0x1)
-#define   C_008D8C_ISOLATE_GS_ENABLE                   0xFFFFDFFF
-#define   S_008D8C_VS_PC_LIMIT_ENABLE(x)               (((x) & 0x1) << 14)
-#define   G_008D8C_VS_PC_LIMIT_ENABLE(x)               (((x) >> 14) & 0x1)
-#define   C_008D8C_VS_PC_LIMIT_ENABLE                  0xFFFFBFFF
-#define R_009508_TA_CNTL_AUX                         0x009508
-#define   S_009508_DISABLE_CUBE_WRAP(x)                (((x) & 0x1) << 0)
-#define   G_009508_DISABLE_CUBE_WRAP(x)                (((x) >> 0) & 0x1)
-#define   C_009508_DISABLE_CUBE_WRAP                   0xFFFFFFFE
-#define   S_009508_SYNC_GRADIENT(x)                    (((x) & 0x1) << 24)
-#define   G_009508_SYNC_GRADIENT(x)                    (((x) >> 24) & 0x1)
-#define   C_009508_SYNC_GRADIENT                       0xFEFFFFFF
-#define   S_009508_SYNC_WALKER(x)                      (((x) & 0x1) << 25)
-#define   G_009508_SYNC_WALKER(x)                      (((x) >> 25) & 0x1)
-#define   C_009508_SYNC_WALKER                         0xFDFFFFFF
-#define   S_009508_SYNC_ALIGNER(x)                     (((x) & 0x1) << 26)
-#define   G_009508_SYNC_ALIGNER(x)                     (((x) >> 26) & 0x1)
-#define   C_009508_SYNC_ALIGNER                        0xFBFFFFFF
-#define   S_009508_BILINEAR_PRECISION(x)               (((x) & 0x1) << 31)
-#define   G_009508_BILINEAR_PRECISION(x)               (((x) >> 31) & 0x1)
-#define   C_009508_BILINEAR_PRECISION                  0x7FFFFFFF
-#define R_009714_VC_ENHANCE                          0x009714
-#define R_009830_DB_DEBUG                            0x009830
-#define R_009838_DB_WATERMARKS                       0x009838
-#define   S_009838_DEPTH_FREE(x)                       (((x) & 0x1F) << 0)
-#define   G_009838_DEPTH_FREE(x)                       (((x) >> 0) & 0x1F)
-#define   C_009838_DEPTH_FREE                          0xFFFFFFE0
-#define   S_009838_DEPTH_FLUSH(x)                      (((x) & 0x3F) << 5)
-#define   G_009838_DEPTH_FLUSH(x)                      (((x) >> 5) & 0x3F)
-#define   C_009838_DEPTH_FLUSH                         0xFFFFF81F
-#define   S_009838_FORCE_SUMMARIZE(x)                  (((x) & 0xF) << 11)
-#define   G_009838_FORCE_SUMMARIZE(x)                  (((x) >> 11) & 0xF)
-#define   C_009838_FORCE_SUMMARIZE                     0xFFFF87FF
-#define   S_009838_DEPTH_PENDING_FREE(x)               (((x) & 0x1F) << 15)
-#define   G_009838_DEPTH_PENDING_FREE(x)               (((x) >> 15) & 0x1F)
-#define   C_009838_DEPTH_PENDING_FREE                  0xFFF07FFF
-#define   S_009838_DEPTH_CACHELINE_FREE(x)             (((x) & 0x1F) << 20)
-#define   G_009838_DEPTH_CACHELINE_FREE(x)             (((x) >> 20) & 0x1F)
-#define   C_009838_DEPTH_CACHELINE_FREE                0xFE0FFFFF
-#define   S_009838_EARLY_Z_PANIC_DISABLE(x)            (((x) & 0x1) << 25)
-#define   G_009838_EARLY_Z_PANIC_DISABLE(x)            (((x) >> 25) & 0x1)
-#define   C_009838_EARLY_Z_PANIC_DISABLE               0xFDFFFFFF
-#define   S_009838_LATE_Z_PANIC_DISABLE(x)             (((x) & 0x1) << 26)
-#define   G_009838_LATE_Z_PANIC_DISABLE(x)             (((x) >> 26) & 0x1)
-#define   C_009838_LATE_Z_PANIC_DISABLE                0xFBFFFFFF
-#define   S_009838_RE_Z_PANIC_DISABLE(x)               (((x) & 0x1) << 27)
-#define   G_009838_RE_Z_PANIC_DISABLE(x)               (((x) >> 27) & 0x1)
-#define   C_009838_RE_Z_PANIC_DISABLE                  0xF7FFFFFF
-#define   S_009838_DB_EXTRA_DEBUG(x)                   (((x) & 0xF) << 28)
-#define   G_009838_DB_EXTRA_DEBUG(x)                   (((x) >> 28) & 0xF)
-#define   C_009838_DB_EXTRA_DEBUG                      0x0FFFFFFF
-#define R_028030_PA_SC_SCREEN_SCISSOR_TL             0x028030
-#define   S_028030_TL_X(x)                             (((x) & 0x7FFF) << 0)
-#define   G_028030_TL_X(x)                             (((x) >> 0) & 0x7FFF)
-#define   C_028030_TL_X                                0xFFFF8000
-#define   S_028030_TL_Y(x)                             (((x) & 0x7FFF) << 16)
-#define   G_028030_TL_Y(x)                             (((x) >> 16) & 0x7FFF)
-#define   C_028030_TL_Y                                0x8000FFFF
-#define R_028034_PA_SC_SCREEN_SCISSOR_BR             0x028034
-#define   S_028034_BR_X(x)                             (((x) & 0x7FFF) << 0)
-#define   G_028034_BR_X(x)                             (((x) >> 0) & 0x7FFF)
-#define   C_028034_BR_X                                0xFFFF8000
-#define   S_028034_BR_Y(x)                             (((x) & 0x7FFF) << 16)
-#define   G_028034_BR_Y(x)                             (((x) >> 16) & 0x7FFF)
-#define   C_028034_BR_Y                                0x8000FFFF
-#define R_028200_PA_SC_WINDOW_OFFSET                 0x028200
-#define   S_028200_WINDOW_X_OFFSET(x)                  (((x) & 0x7FFF) << 0)
-#define   G_028200_WINDOW_X_OFFSET(x)                  (((x) >> 0) & 0x7FFF)
-#define   C_028200_WINDOW_X_OFFSET                     0xFFFF8000
-#define   S_028200_WINDOW_Y_OFFSET(x)                  (((x) & 0x7FFF) << 16)
-#define   G_028200_WINDOW_Y_OFFSET(x)                  (((x) >> 16) & 0x7FFF)
-#define   C_028200_WINDOW_Y_OFFSET                     0x8000FFFF
-#define R_028204_PA_SC_WINDOW_SCISSOR_TL             0x028204
-#define   S_028204_TL_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028204_TL_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028204_TL_X                                0xFFFFC000
-#define   S_028204_TL_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028204_TL_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028204_TL_Y                                0xC000FFFF
-#define   S_028204_WINDOW_OFFSET_DISABLE(x)            (((x) & 0x1) << 31)
-#define   G_028204_WINDOW_OFFSET_DISABLE(x)            (((x) >> 31) & 0x1)
-#define   C_028204_WINDOW_OFFSET_DISABLE               0x7FFFFFFF
-#define R_028208_PA_SC_WINDOW_SCISSOR_BR             0x028208
-#define   S_028208_BR_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028208_BR_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028208_BR_X                                0xFFFFC000
-#define   S_028208_BR_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028208_BR_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028208_BR_Y                                0xC000FFFF
-#define R_02820C_PA_SC_CLIPRECT_RULE                 0x02820C
-#define   S_02820C_CLIP_RULE(x)                        (((x) & 0xFFFF) << 0)
-#define   G_02820C_CLIP_RULE(x)                        (((x) >> 0) & 0xFFFF)
-#define   C_02820C_CLIP_RULE                           0xFFFF0000
-#define R_028210_PA_SC_CLIPRECT_0_TL                 0x028210
-#define   S_028210_TL_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028210_TL_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028210_TL_X                                0xFFFFC000
-#define   S_028210_TL_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028210_TL_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028210_TL_Y                                0xC000FFFF
-#define R_028214_PA_SC_CLIPRECT_0_BR                 0x028214
-#define   S_028214_BR_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028214_BR_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028214_BR_X                                0xFFFFC000
-#define   S_028214_BR_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028214_BR_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028214_BR_Y                                0xC000FFFF
-#define R_028218_PA_SC_CLIPRECT_1_TL                 0x028218
-#define R_02821C_PA_SC_CLIPRECT_1_BR                 0x02821C
-#define R_028220_PA_SC_CLIPRECT_2_TL                 0x028220
-#define R_028224_PA_SC_CLIPRECT_2_BR                 0x028224
-#define R_028228_PA_SC_CLIPRECT_3_TL                 0x028228
-#define R_02822C_PA_SC_CLIPRECT_3_BR                 0x02822C
-#define R_028230_PA_SC_EDGERULE                      0x028230
-#define R_028240_PA_SC_GENERIC_SCISSOR_TL            0x028240
-#define   S_028240_TL_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028240_TL_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028240_TL_X                                0xFFFFC000
-#define   S_028240_TL_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028240_TL_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028240_TL_Y                                0xC000FFFF
-#define   S_028240_WINDOW_OFFSET_DISABLE(x)            (((x) & 0x1) << 31)
-#define   G_028240_WINDOW_OFFSET_DISABLE(x)            (((x) >> 31) & 0x1)
-#define   C_028240_WINDOW_OFFSET_DISABLE               0x7FFFFFFF
-#define R_028244_PA_SC_GENERIC_SCISSOR_BR            0x028244
-#define   S_028244_BR_X(x)                             (((x) & 0x3FFF) << 0)
-#define   G_028244_BR_X(x)                             (((x) >> 0) & 0x3FFF)
-#define   C_028244_BR_X                                0xFFFFC000
-#define   S_028244_BR_Y(x)                             (((x) & 0x3FFF) << 16)
-#define   G_028244_BR_Y(x)                             (((x) >> 16) & 0x3FFF)
-#define   C_028244_BR_Y                                0xC000FFFF
-#define R_0282D0_PA_SC_VPORT_ZMIN_0                  0x0282D0
-#define   S_0282D0_VPORT_ZMIN(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_0282D0_VPORT_ZMIN(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_0282D0_VPORT_ZMIN                          0x00000000
-#define R_0282D4_PA_SC_VPORT_ZMAX_0                  0x0282D4
-#define   S_0282D4_VPORT_ZMAX(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_0282D4_VPORT_ZMAX(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_0282D4_VPORT_ZMAX                          0x00000000
-#define R_028350_SX_MISC                             0x028350
-#define   S_028350_MULTIPASS(x)                        (((x) & 0x1) << 0)
-#define   G_028350_MULTIPASS(x)                        (((x) >> 0) & 0x1)
-#define   C_028350_MULTIPASS                           0xFFFFFFFE
-#define R_028380_SQ_VTX_SEMANTIC_0                   0x028380
-#define   S_028380_SEMANTIC_ID(x)                      (((x) & 0xFF) << 0)
-#define   G_028380_SEMANTIC_ID(x)                      (((x) >> 0) & 0xFF)
-#define   C_028380_SEMANTIC_ID                         0xFFFFFF00
-#define R_028384_SQ_VTX_SEMANTIC_1                   0x028384
-#define R_028388_SQ_VTX_SEMANTIC_2                   0x028388
-#define R_02838C_SQ_VTX_SEMANTIC_3                   0x02838C
-#define R_028390_SQ_VTX_SEMANTIC_4                   0x028390
-#define R_028394_SQ_VTX_SEMANTIC_5                   0x028394
-#define R_028398_SQ_VTX_SEMANTIC_6                   0x028398
-#define R_02839C_SQ_VTX_SEMANTIC_7                   0x02839C
-#define R_0283A0_SQ_VTX_SEMANTIC_8                   0x0283A0
-#define R_0283A4_SQ_VTX_SEMANTIC_9                   0x0283A4
-#define R_0283A8_SQ_VTX_SEMANTIC_10                  0x0283A8
-#define R_0283AC_SQ_VTX_SEMANTIC_11                  0x0283AC
-#define R_0283B0_SQ_VTX_SEMANTIC_12                  0x0283B0
-#define R_0283B4_SQ_VTX_SEMANTIC_13                  0x0283B4
-#define R_0283B8_SQ_VTX_SEMANTIC_14                  0x0283B8
-#define R_0283BC_SQ_VTX_SEMANTIC_15                  0x0283BC
-#define R_0283C0_SQ_VTX_SEMANTIC_16                  0x0283C0
-#define R_0283C4_SQ_VTX_SEMANTIC_17                  0x0283C4
-#define R_0283C8_SQ_VTX_SEMANTIC_18                  0x0283C8
-#define R_0283CC_SQ_VTX_SEMANTIC_19                  0x0283CC
-#define R_0283D0_SQ_VTX_SEMANTIC_20                  0x0283D0
-#define R_0283D4_SQ_VTX_SEMANTIC_21                  0x0283D4
-#define R_0283D8_SQ_VTX_SEMANTIC_22                  0x0283D8
-#define R_0283DC_SQ_VTX_SEMANTIC_23                  0x0283DC
-#define R_0283E0_SQ_VTX_SEMANTIC_24                  0x0283E0
-#define R_0283E4_SQ_VTX_SEMANTIC_25                  0x0283E4
-#define R_0283E8_SQ_VTX_SEMANTIC_26                  0x0283E8
-#define R_0283EC_SQ_VTX_SEMANTIC_27                  0x0283EC
-#define R_0283F0_SQ_VTX_SEMANTIC_28                  0x0283F0
-#define R_0283F4_SQ_VTX_SEMANTIC_29                  0x0283F4
-#define R_0283F8_SQ_VTX_SEMANTIC_30                  0x0283F8
-#define R_0283FC_SQ_VTX_SEMANTIC_31                  0x0283FC
-#define R_028400_VGT_MAX_VTX_INDX                    0x028400
-#define   S_028400_MAX_INDX(x)                         (((x) & 0xFFFFFFFF) << 0)
-#define   G_028400_MAX_INDX(x)                         (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028400_MAX_INDX                            0x00000000
-#define R_028404_VGT_MIN_VTX_INDX                    0x028404
-#define   S_028404_MIN_INDX(x)                         (((x) & 0xFFFFFFFF) << 0)
-#define   G_028404_MIN_INDX(x)                         (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028404_MIN_INDX                            0x00000000
-#define R_028408_VGT_INDX_OFFSET                     0x028408
-#define   S_028408_INDX_OFFSET(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_028408_INDX_OFFSET(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028408_INDX_OFFSET                         0x00000000
-#define R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX        0x02840C
-#define   S_02840C_RESET_INDX(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_02840C_RESET_INDX(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_02840C_RESET_INDX                          0x00000000
-#define R_028410_SX_ALPHA_TEST_CONTROL               0x028410
-#define   S_028410_ALPHA_FUNC(x)                       (((x) & 0x7) << 0)
-#define   G_028410_ALPHA_FUNC(x)                       (((x) >> 0) & 0x7)
-#define   C_028410_ALPHA_FUNC                          0xFFFFFFF8
-#define   S_028410_ALPHA_TEST_ENABLE(x)                (((x) & 0x1) << 3)
-#define   G_028410_ALPHA_TEST_ENABLE(x)                (((x) >> 3) & 0x1)
-#define   C_028410_ALPHA_TEST_ENABLE                   0xFFFFFFF7
-#define   S_028410_ALPHA_TEST_BYPASS(x)                (((x) & 0x1) << 8)
-#define   G_028410_ALPHA_TEST_BYPASS(x)                (((x) >> 8) & 0x1)
-#define   C_028410_ALPHA_TEST_BYPASS                   0xFFFFFEFF
-#define R_028414_CB_BLEND_RED                        0x028414
-#define   S_028414_BLEND_RED(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028414_BLEND_RED(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028414_BLEND_RED                           0x00000000
-#define R_028418_CB_BLEND_GREEN                      0x028418
-#define   S_028418_BLEND_GREEN(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_028418_BLEND_GREEN(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028418_BLEND_GREEN                         0x00000000
-#define R_02841C_CB_BLEND_BLUE                       0x02841C
-#define   S_02841C_BLEND_BLUE(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_02841C_BLEND_BLUE(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_02841C_BLEND_BLUE                          0x00000000
-#define R_028420_CB_BLEND_ALPHA                      0x028420
-#define   S_028420_BLEND_ALPHA(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_028420_BLEND_ALPHA(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028420_BLEND_ALPHA                         0x00000000
-#define R_028438_SX_ALPHA_REF                        0x028438
-#define   S_028438_ALPHA_REF(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028438_ALPHA_REF(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028438_ALPHA_REF                           0x00000000
-#define R_0286C8_SPI_THREAD_GROUPING                 0x0286C8
-#define   S_0286C8_PS_GROUPING(x)                      (((x) & 0x1F) << 0)
-#define   G_0286C8_PS_GROUPING(x)                      (((x) >> 0) & 0x1F)
-#define   C_0286C8_PS_GROUPING                         0xFFFFFFE0
-#define   S_0286C8_VS_GROUPING(x)                      (((x) & 0x1F) << 8)
-#define   G_0286C8_VS_GROUPING(x)                      (((x) >> 8) & 0x1F)
-#define   C_0286C8_VS_GROUPING                         0xFFFFE0FF
-#define   S_0286C8_GS_GROUPING(x)                      (((x) & 0x1F) << 16)
-#define   G_0286C8_GS_GROUPING(x)                      (((x) >> 16) & 0x1F)
-#define   C_0286C8_GS_GROUPING                         0xFFE0FFFF
-#define   S_0286C8_ES_GROUPING(x)                      (((x) & 0x1F) << 24)
-#define   G_0286C8_ES_GROUPING(x)                      (((x) >> 24) & 0x1F)
-#define   C_0286C8_ES_GROUPING                         0xE0FFFFFF
-#define R_0286D8_SPI_INPUT_Z                         0x0286D8
-#define   S_0286D8_PROVIDE_Z_TO_SPI(x)                 (((x) & 0x1) << 0)
-#define   G_0286D8_PROVIDE_Z_TO_SPI(x)                 (((x) >> 0) & 0x1)
-#define   C_0286D8_PROVIDE_Z_TO_SPI                    0xFFFFFFFE
-#define R_0286DC_SPI_FOG_CNTL                        0x0286DC
-#define   S_0286DC_PASS_FOG_THROUGH_PS(x)              (((x) & 0x1) << 0)
-#define   G_0286DC_PASS_FOG_THROUGH_PS(x)              (((x) >> 0) & 0x1)
-#define   C_0286DC_PASS_FOG_THROUGH_PS                 0xFFFFFFFE
-#define   S_0286DC_PIXEL_FOG_FUNC(x)                   (((x) & 0x3) << 1)
-#define   G_0286DC_PIXEL_FOG_FUNC(x)                   (((x) >> 1) & 0x3)
-#define   C_0286DC_PIXEL_FOG_FUNC                      0xFFFFFFF9
-#define   S_0286DC_PIXEL_FOG_SRC_SEL(x)                (((x) & 0x1) << 3)
-#define   G_0286DC_PIXEL_FOG_SRC_SEL(x)                (((x) >> 3) & 0x1)
-#define   C_0286DC_PIXEL_FOG_SRC_SEL                   0xFFFFFFF7
-#define   S_0286DC_VS_FOG_CLAMP_DISABLE(x)             (((x) & 0x1) << 4)
-#define   G_0286DC_VS_FOG_CLAMP_DISABLE(x)             (((x) >> 4) & 0x1)
-#define   C_0286DC_VS_FOG_CLAMP_DISABLE                0xFFFFFFEF
-#define R_0286E0_SPI_FOG_FUNC_SCALE                  0x0286E0
-#define   S_0286E0_VALUE(x)                            (((x) & 0xFFFFFFFF) << 0)
-#define   G_0286E0_VALUE(x)                            (((x) >> 0) & 0xFFFFFFFF)
-#define   C_0286E0_VALUE                               0x00000000
-#define R_0286E4_SPI_FOG_FUNC_BIAS                   0x0286E4
-#define   S_0286E4_VALUE(x)                            (((x) & 0xFFFFFFFF) << 0)
-#define   G_0286E4_VALUE(x)                            (((x) >> 0) & 0xFFFFFFFF)
-#define   C_0286E4_VALUE                               0x00000000
-#define R_0287A0_CB_SHADER_CONTROL                   0x0287A0
-#define   S_0287A0_RT0_ENABLE(x)                       (((x) & 0x1) << 0)
-#define   G_0287A0_RT0_ENABLE(x)                       (((x) >> 0) & 0x1)
-#define   C_0287A0_RT0_ENABLE                          0xFFFFFFFE
-#define   S_0287A0_RT1_ENABLE(x)                       (((x) & 0x1) << 1)
-#define   G_0287A0_RT1_ENABLE(x)                       (((x) >> 1) & 0x1)
-#define   C_0287A0_RT1_ENABLE                          0xFFFFFFFD
-#define   S_0287A0_RT2_ENABLE(x)                       (((x) & 0x1) << 2)
-#define   G_0287A0_RT2_ENABLE(x)                       (((x) >> 2) & 0x1)
-#define   C_0287A0_RT2_ENABLE                          0xFFFFFFFB
-#define   S_0287A0_RT3_ENABLE(x)                       (((x) & 0x1) << 3)
-#define   G_0287A0_RT3_ENABLE(x)                       (((x) >> 3) & 0x1)
-#define   C_0287A0_RT3_ENABLE                          0xFFFFFFF7
-#define   S_0287A0_RT4_ENABLE(x)                       (((x) & 0x1) << 4)
-#define   G_0287A0_RT4_ENABLE(x)                       (((x) >> 4) & 0x1)
-#define   C_0287A0_RT4_ENABLE                          0xFFFFFFEF
-#define   S_0287A0_RT5_ENABLE(x)                       (((x) & 0x1) << 5)
-#define   G_0287A0_RT5_ENABLE(x)                       (((x) >> 5) & 0x1)
-#define   C_0287A0_RT5_ENABLE                          0xFFFFFFDF
-#define   S_0287A0_RT6_ENABLE(x)                       (((x) & 0x1) << 6)
-#define   G_0287A0_RT6_ENABLE(x)                       (((x) >> 6) & 0x1)
-#define   C_0287A0_RT6_ENABLE                          0xFFFFFFBF
-#define   S_0287A0_RT7_ENABLE(x)                       (((x) & 0x1) << 7)
-#define   G_0287A0_RT7_ENABLE(x)                       (((x) >> 7) & 0x1)
-#define   C_0287A0_RT7_ENABLE                          0xFFFFFF7F
-#define R_028894_SQ_PGM_START_FS                     0x028894
-#define   S_028894_PGM_START(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028894_PGM_START(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028894_PGM_START                           0x00000000
-#define R_0288A4_SQ_PGM_RESOURCES_FS                 0x0288A4
-#define   S_0288A4_NUM_GPRS(x)                         (((x) & 0xFF) << 0)
-#define   G_0288A4_NUM_GPRS(x)                         (((x) >> 0) & 0xFF)
-#define   C_0288A4_NUM_GPRS                            0xFFFFFF00
-#define   S_0288A4_STACK_SIZE(x)                       (((x) & 0xFF) << 8)
-#define   G_0288A4_STACK_SIZE(x)                       (((x) >> 8) & 0xFF)
-#define   C_0288A4_STACK_SIZE                          0xFFFF00FF
-#define   S_0288A4_DX10_CLAMP(x)                       (((x) & 0x1) << 21)
-#define   G_0288A4_DX10_CLAMP(x)                       (((x) >> 21) & 0x1)
-#define   C_0288A4_DX10_CLAMP                          0xFFDFFFFF
-#define R_0288A8_SQ_ESGS_RING_ITEMSIZE               0x0288A8
-#define   S_0288A8_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288A8_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288A8_ITEMSIZE                            0xFFFF8000
-#define R_0288AC_SQ_GSVS_RING_ITEMSIZE               0x0288AC
-#define   S_0288AC_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288AC_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288AC_ITEMSIZE                            0xFFFF8000
-#define R_0288B0_SQ_ESTMP_RING_ITEMSIZE              0x0288B0
-#define   S_0288B0_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288B0_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288B0_ITEMSIZE                            0xFFFF8000
-#define R_0288B4_SQ_GSTMP_RING_ITEMSIZE              0x0288B4
-#define   S_0288B4_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288B4_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288B4_ITEMSIZE                            0xFFFF8000
-#define R_0288B8_SQ_VSTMP_RING_ITEMSIZE              0x0288B8
-#define   S_0288B8_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288B8_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288B8_ITEMSIZE                            0xFFFF8000
-#define R_0288BC_SQ_PSTMP_RING_ITEMSIZE              0x0288BC
-#define   S_0288BC_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288BC_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288BC_ITEMSIZE                            0xFFFF8000
-#define R_0288C0_SQ_FBUF_RING_ITEMSIZE               0x0288C0
-#define   S_0288C0_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288C0_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288C0_ITEMSIZE                            0xFFFF8000
-#define R_0288C4_SQ_REDUC_RING_ITEMSIZE              0x0288C4
-#define   S_0288C4_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288C4_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288C4_ITEMSIZE                            0xFFFF8000
-#define R_0288C8_SQ_GS_VERT_ITEMSIZE                 0x0288C8
-#define   S_0288C8_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
-#define   G_0288C8_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
-#define   C_0288C8_ITEMSIZE                            0xFFFF8000
-#define R_0288DC_SQ_PGM_CF_OFFSET_FS                 0x0288DC
-#define   S_0288DC_PGM_CF_OFFSET(x)                    (((x) & 0xFFFFF) << 0)
-#define   G_0288DC_PGM_CF_OFFSET(x)                    (((x) >> 0) & 0xFFFFF)
-#define   C_0288DC_PGM_CF_OFFSET                       0xFFF00000
-#define R_028A10_VGT_OUTPUT_PATH_CNTL                0x028A10
-#define   S_028A10_PATH_SELECT(x)                      (((x) & 0x3) << 0)
-#define   G_028A10_PATH_SELECT(x)                      (((x) >> 0) & 0x3)
-#define   C_028A10_PATH_SELECT                         0xFFFFFFFC
-#define R_028A14_VGT_HOS_CNTL                        0x028A14
-#define   S_028A14_TESS_MODE(x)                        (((x) & 0x3) << 0)
-#define   G_028A14_TESS_MODE(x)                        (((x) >> 0) & 0x3)
-#define   C_028A14_TESS_MODE                           0xFFFFFFFC
-#define R_028A18_VGT_HOS_MAX_TESS_LEVEL              0x028A18
-#define   S_028A18_MAX_TESS(x)                         (((x) & 0xFFFFFFFF) << 0)
-#define   G_028A18_MAX_TESS(x)                         (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028A18_MAX_TESS                            0x00000000
-#define R_028A1C_VGT_HOS_MIN_TESS_LEVEL              0x028A1C
-#define   S_028A1C_MIN_TESS(x)                         (((x) & 0xFFFFFFFF) << 0)
-#define   G_028A1C_MIN_TESS(x)                         (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028A1C_MIN_TESS                            0x00000000
-#define R_028A20_VGT_HOS_REUSE_DEPTH                 0x028A20
-#define   S_028A20_REUSE_DEPTH(x)                      (((x) & 0xFF) << 0)
-#define   G_028A20_REUSE_DEPTH(x)                      (((x) >> 0) & 0xFF)
-#define   C_028A20_REUSE_DEPTH                         0xFFFFFF00
-#define R_028A24_VGT_GROUP_PRIM_TYPE                 0x028A24
-#define   S_028A24_PRIM_TYPE(x)                        (((x) & 0x1F) << 0)
-#define   G_028A24_PRIM_TYPE(x)                        (((x) >> 0) & 0x1F)
-#define   C_028A24_PRIM_TYPE                           0xFFFFFFE0
-#define   S_028A24_RETAIN_ORDER(x)                     (((x) & 0x1) << 14)
-#define   G_028A24_RETAIN_ORDER(x)                     (((x) >> 14) & 0x1)
-#define   C_028A24_RETAIN_ORDER                        0xFFFFBFFF
-#define   S_028A24_RETAIN_QUADS(x)                     (((x) & 0x1) << 15)
-#define   G_028A24_RETAIN_QUADS(x)                     (((x) >> 15) & 0x1)
-#define   C_028A24_RETAIN_QUADS                        0xFFFF7FFF
-#define   S_028A24_PRIM_ORDER(x)                       (((x) & 0x7) << 16)
-#define   G_028A24_PRIM_ORDER(x)                       (((x) >> 16) & 0x7)
-#define   C_028A24_PRIM_ORDER                          0xFFF8FFFF
-#define R_028A28_VGT_GROUP_FIRST_DECR                0x028A28
-#define   S_028A28_FIRST_DECR(x)                       (((x) & 0xF) << 0)
-#define   G_028A28_FIRST_DECR(x)                       (((x) >> 0) & 0xF)
-#define   C_028A28_FIRST_DECR                          0xFFFFFFF0
-#define R_028A2C_VGT_GROUP_DECR                      0x028A2C
-#define   S_028A2C_DECR(x)                             (((x) & 0xF) << 0)
-#define   G_028A2C_DECR(x)                             (((x) >> 0) & 0xF)
-#define   C_028A2C_DECR                                0xFFFFFFF0
-#define R_028A30_VGT_GROUP_VECT_0_CNTL               0x028A30
-#define   S_028A30_COMP_X_EN(x)                        (((x) & 0x1) << 0)
-#define   G_028A30_COMP_X_EN(x)                        (((x) >> 0) & 0x1)
-#define   C_028A30_COMP_X_EN                           0xFFFFFFFE
-#define   S_028A30_COMP_Y_EN(x)                        (((x) & 0x1) << 1)
-#define   G_028A30_COMP_Y_EN(x)                        (((x) >> 1) & 0x1)
-#define   C_028A30_COMP_Y_EN                           0xFFFFFFFD
-#define   S_028A30_COMP_Z_EN(x)                        (((x) & 0x1) << 2)
-#define   G_028A30_COMP_Z_EN(x)                        (((x) >> 2) & 0x1)
-#define   C_028A30_COMP_Z_EN                           0xFFFFFFFB
-#define   S_028A30_COMP_W_EN(x)                        (((x) & 0x1) << 3)
-#define   G_028A30_COMP_W_EN(x)                        (((x) >> 3) & 0x1)
-#define   C_028A30_COMP_W_EN                           0xFFFFFFF7
-#define   S_028A30_STRIDE(x)                           (((x) & 0xFF) << 8)
-#define   G_028A30_STRIDE(x)                           (((x) >> 8) & 0xFF)
-#define   C_028A30_STRIDE                              0xFFFF00FF
-#define   S_028A30_SHIFT(x)                            (((x) & 0xFF) << 16)
-#define   G_028A30_SHIFT(x)                            (((x) >> 16) & 0xFF)
-#define   C_028A30_SHIFT                               0xFF00FFFF
-#define R_028A34_VGT_GROUP_VECT_1_CNTL               0x028A34
-#define   S_028A34_COMP_X_EN(x)                        (((x) & 0x1) << 0)
-#define   G_028A34_COMP_X_EN(x)                        (((x) >> 0) & 0x1)
-#define   C_028A34_COMP_X_EN                           0xFFFFFFFE
-#define   S_028A34_COMP_Y_EN(x)                        (((x) & 0x1) << 1)
-#define   G_028A34_COMP_Y_EN(x)                        (((x) >> 1) & 0x1)
-#define   C_028A34_COMP_Y_EN                           0xFFFFFFFD
-#define   S_028A34_COMP_Z_EN(x)                        (((x) & 0x1) << 2)
-#define   G_028A34_COMP_Z_EN(x)                        (((x) >> 2) & 0x1)
-#define   C_028A34_COMP_Z_EN                           0xFFFFFFFB
-#define   S_028A34_COMP_W_EN(x)                        (((x) & 0x1) << 3)
-#define   G_028A34_COMP_W_EN(x)                        (((x) >> 3) & 0x1)
-#define   C_028A34_COMP_W_EN                           0xFFFFFFF7
-#define   S_028A34_STRIDE(x)                           (((x) & 0xFF) << 8)
-#define   G_028A34_STRIDE(x)                           (((x) >> 8) & 0xFF)
-#define   C_028A34_STRIDE                              0xFFFF00FF
-#define   S_028A34_SHIFT(x)                            (((x) & 0xFF) << 16)
-#define   G_028A34_SHIFT(x)                            (((x) >> 16) & 0xFF)
-#define   C_028A34_SHIFT                               0xFF00FFFF
-#define R_028A38_VGT_GROUP_VECT_0_FMT_CNTL           0x028A38
-#define   S_028A38_X_CONV(x)                           (((x) & 0xF) << 0)
-#define   G_028A38_X_CONV(x)                           (((x) >> 0) & 0xF)
-#define   C_028A38_X_CONV                              0xFFFFFFF0
-#define   S_028A38_X_OFFSET(x)                         (((x) & 0xF) << 4)
-#define   G_028A38_X_OFFSET(x)                         (((x) >> 4) & 0xF)
-#define   C_028A38_X_OFFSET                            0xFFFFFF0F
-#define   S_028A38_Y_CONV(x)                           (((x) & 0xF) << 8)
-#define   G_028A38_Y_CONV(x)                           (((x) >> 8) & 0xF)
-#define   C_028A38_Y_CONV                              0xFFFFF0FF
-#define   S_028A38_Y_OFFSET(x)                         (((x) & 0xF) << 12)
-#define   G_028A38_Y_OFFSET(x)                         (((x) >> 12) & 0xF)
-#define   C_028A38_Y_OFFSET                            0xFFFF0FFF
-#define   S_028A38_Z_CONV(x)                           (((x) & 0xF) << 16)
-#define   G_028A38_Z_CONV(x)                           (((x) >> 16) & 0xF)
-#define   C_028A38_Z_CONV                              0xFFF0FFFF
-#define   S_028A38_Z_OFFSET(x)                         (((x) & 0xF) << 20)
-#define   G_028A38_Z_OFFSET(x)                         (((x) >> 20) & 0xF)
-#define   C_028A38_Z_OFFSET                            0xFF0FFFFF
-#define   S_028A38_W_CONV(x)                           (((x) & 0xF) << 24)
-#define   G_028A38_W_CONV(x)                           (((x) >> 24) & 0xF)
-#define   C_028A38_W_CONV                              0xF0FFFFFF
-#define   S_028A38_W_OFFSET(x)                         (((x) & 0xF) << 28)
-#define   G_028A38_W_OFFSET(x)                         (((x) >> 28) & 0xF)
-#define   C_028A38_W_OFFSET                            0x0FFFFFFF
-#define R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL           0x028A3C
-#define   S_028A3C_X_CONV(x)                           (((x) & 0xF) << 0)
-#define   G_028A3C_X_CONV(x)                           (((x) >> 0) & 0xF)
-#define   C_028A3C_X_CONV                              0xFFFFFFF0
-#define   S_028A3C_X_OFFSET(x)                         (((x) & 0xF) << 4)
-#define   G_028A3C_X_OFFSET(x)                         (((x) >> 4) & 0xF)
-#define   C_028A3C_X_OFFSET                            0xFFFFFF0F
-#define   S_028A3C_Y_CONV(x)                           (((x) & 0xF) << 8)
-#define   G_028A3C_Y_CONV(x)                           (((x) >> 8) & 0xF)
-#define   C_028A3C_Y_CONV                              0xFFFFF0FF
-#define   S_028A3C_Y_OFFSET(x)                         (((x) & 0xF) << 12)
-#define   G_028A3C_Y_OFFSET(x)                         (((x) >> 12) & 0xF)
-#define   C_028A3C_Y_OFFSET                            0xFFFF0FFF
-#define   S_028A3C_Z_CONV(x)                           (((x) & 0xF) << 16)
-#define   G_028A3C_Z_CONV(x)                           (((x) >> 16) & 0xF)
-#define   C_028A3C_Z_CONV                              0xFFF0FFFF
-#define   S_028A3C_Z_OFFSET(x)                         (((x) & 0xF) << 20)
-#define   G_028A3C_Z_OFFSET(x)                         (((x) >> 20) & 0xF)
-#define   C_028A3C_Z_OFFSET                            0xFF0FFFFF
-#define   S_028A3C_W_CONV(x)                           (((x) & 0xF) << 24)
-#define   G_028A3C_W_CONV(x)                           (((x) >> 24) & 0xF)
-#define   C_028A3C_W_CONV                              0xF0FFFFFF
-#define   S_028A3C_W_OFFSET(x)                         (((x) & 0xF) << 28)
-#define   G_028A3C_W_OFFSET(x)                         (((x) >> 28) & 0xF)
-#define   C_028A3C_W_OFFSET                            0x0FFFFFFF
-#define R_028A40_VGT_GS_MODE                         0x028A40
-#define   S_028A40_MODE(x)                             (((x) & 0x3) << 0)
-#define   G_028A40_MODE(x)                             (((x) >> 0) & 0x3)
-#define   C_028A40_MODE                                0xFFFFFFFC
-#define   S_028A40_ES_PASSTHRU(x)                      (((x) & 0x1) << 2)
-#define   G_028A40_ES_PASSTHRU(x)                      (((x) >> 2) & 0x1)
-#define   C_028A40_ES_PASSTHRU                         0xFFFFFFFB
-#define   S_028A40_CUT_MODE(x)                         (((x) & 0x3) << 3)
-#define   G_028A40_CUT_MODE(x)                         (((x) >> 3) & 0x3)
-#define   C_028A40_CUT_MODE                            0xFFFFFFE7
-#define R_028A4C_PA_SC_MODE_CNTL                     0x028A4C
-#define   S_028A4C_MSAA_ENABLE(x)                      (((x) & 0x1) << 0)
-#define   G_028A4C_MSAA_ENABLE(x)                      (((x) >> 0) & 0x1)
-#define   C_028A4C_MSAA_ENABLE                         0xFFFFFFFE
-#define   S_028A4C_CLIPRECT_ENABLE(x)                  (((x) & 0x1) << 1)
-#define   G_028A4C_CLIPRECT_ENABLE(x)                  (((x) >> 1) & 0x1)
-#define   C_028A4C_CLIPRECT_ENABLE                     0xFFFFFFFD
-#define   S_028A4C_LINE_STIPPLE_ENABLE(x)              (((x) & 0x1) << 2)
-#define   G_028A4C_LINE_STIPPLE_ENABLE(x)              (((x) >> 2) & 0x1)
-#define   C_028A4C_LINE_STIPPLE_ENABLE                 0xFFFFFFFB
-#define   S_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB(x)     (((x) & 0x1) << 3)
-#define   G_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB(x)     (((x) >> 3) & 0x1)
-#define   C_028A4C_MULTI_CHIP_PRIM_DISCARD_ENAB        0xFFFFFFF7
-#define   S_028A4C_WALK_ORDER_ENABLE(x)                (((x) & 0x1) << 4)
-#define   G_028A4C_WALK_ORDER_ENABLE(x)                (((x) >> 4) & 0x1)
-#define   C_028A4C_WALK_ORDER_ENABLE                   0xFFFFFFEF
-#define   S_028A4C_HALVE_DETAIL_SAMPLE_PERF(x)         (((x) & 0x1) << 5)
-#define   G_028A4C_HALVE_DETAIL_SAMPLE_PERF(x)         (((x) >> 5) & 0x1)
-#define   C_028A4C_HALVE_DETAIL_SAMPLE_PERF            0xFFFFFFDF
-#define   S_028A4C_WALK_SIZE(x)                        (((x) & 0x1) << 6)
-#define   G_028A4C_WALK_SIZE(x)                        (((x) >> 6) & 0x1)
-#define   C_028A4C_WALK_SIZE                           0xFFFFFFBF
-#define   S_028A4C_WALK_ALIGNMENT(x)                   (((x) & 0x1) << 7)
-#define   G_028A4C_WALK_ALIGNMENT(x)                   (((x) >> 7) & 0x1)
-#define   C_028A4C_WALK_ALIGNMENT                      0xFFFFFF7F
-#define   S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(x)         (((x) & 0x1) << 8)
-#define   G_028A4C_WALK_ALIGN8_PRIM_FITS_ST(x)         (((x) >> 8) & 0x1)
-#define   C_028A4C_WALK_ALIGN8_PRIM_FITS_ST            0xFFFFFEFF
-#define   S_028A4C_TILE_COVER_NO_SCISSOR(x)            (((x) & 0x1) << 9)
-#define   G_028A4C_TILE_COVER_NO_SCISSOR(x)            (((x) >> 9) & 0x1)
-#define   C_028A4C_TILE_COVER_NO_SCISSOR               0xFFFFFDFF
-#define   S_028A4C_KILL_PIX_POST_HI_Z(x)               (((x) & 0x1) << 10)
-#define   G_028A4C_KILL_PIX_POST_HI_Z(x)               (((x) >> 10) & 0x1)
-#define   C_028A4C_KILL_PIX_POST_HI_Z                  0xFFFFFBFF
-#define   S_028A4C_KILL_PIX_POST_DETAIL_MASK(x)        (((x) & 0x1) << 11)
-#define   G_028A4C_KILL_PIX_POST_DETAIL_MASK(x)        (((x) >> 11) & 0x1)
-#define   C_028A4C_KILL_PIX_POST_DETAIL_MASK           0xFFFFF7FF
-#define   S_028A4C_MULTI_CHIP_SUPERTILE_ENABLE(x)      (((x) & 0x1) << 12)
-#define   G_028A4C_MULTI_CHIP_SUPERTILE_ENABLE(x)      (((x) >> 12) & 0x1)
-#define   C_028A4C_MULTI_CHIP_SUPERTILE_ENABLE         0xFFFFEFFF
-#define   S_028A4C_TILE_COVER_DISABLE(x)               (((x) & 0x1) << 13)
-#define   G_028A4C_TILE_COVER_DISABLE(x)               (((x) >> 13) & 0x1)
-#define   C_028A4C_TILE_COVER_DISABLE                  0xFFFFDFFF
-#define   S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x)          (((x) & 0x1) << 14)
-#define   G_028A4C_FORCE_EOV_CNTDWN_ENABLE(x)          (((x) >> 14) & 0x1)
-#define   C_028A4C_FORCE_EOV_CNTDWN_ENABLE             0xFFFFBFFF
-#define   S_028A4C_FORCE_EOV_TILE_ENABLE(x)            (((x) & 0x1) << 15)
-#define   G_028A4C_FORCE_EOV_TILE_ENABLE(x)            (((x) >> 15) & 0x1)
-#define   C_028A4C_FORCE_EOV_TILE_ENABLE               0xFFFF7FFF
-#define   S_028A4C_FORCE_EOV_REZ_ENABLE(x)             (((x) & 0x1) << 16)
-#define   G_028A4C_FORCE_EOV_REZ_ENABLE(x)             (((x) >> 16) & 0x1)
-#define   C_028A4C_FORCE_EOV_REZ_ENABLE                0xFFFEFFFF
-#define   S_028A4C_PS_ITER_SAMPLE(x)                   (((x) & 0x1) << 17)
-#define   G_028A4C_PS_ITER_SAMPLE(x)                   (((x) >> 17) & 0x1)
-#define   C_028A4C_PS_ITER_SAMPLE                      0xFFFDFFFF
-#define R_028A84_VGT_PRIMITIVEID_EN                  0x028A84
-#define   S_028A84_PRIMITIVEID_EN(x)                   (((x) & 0x1) << 0)
-#define   G_028A84_PRIMITIVEID_EN(x)                   (((x) >> 0) & 0x1)
-#define   C_028A84_PRIMITIVEID_EN                      0xFFFFFFFE
-#define R_028A94_VGT_MULTI_PRIM_IB_RESET_EN          0x028A94
-#define   S_028A94_RESET_EN(x)                         (((x) & 0x1) << 0)
-#define   G_028A94_RESET_EN(x)                         (((x) >> 0) & 0x1)
-#define   C_028A94_RESET_EN                            0xFFFFFFFE
-#define R_028AA0_VGT_INSTANCE_STEP_RATE_0            0x028AA0
-#define   S_028AA0_STEP_RATE(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028AA0_STEP_RATE(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028AA0_STEP_RATE                           0x00000000
-#define R_028AA4_VGT_INSTANCE_STEP_RATE_1            0x028AA4
-#define   S_028AA4_STEP_RATE(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028AA4_STEP_RATE(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028AA4_STEP_RATE                           0x00000000
-#define R_028AB0_VGT_STRMOUT_EN                      0x028AB0
-#define   S_028AB0_STREAMOUT(x)                        (((x) & 0x1) << 0)
-#define   G_028AB0_STREAMOUT(x)                        (((x) >> 0) & 0x1)
-#define   C_028AB0_STREAMOUT                           0xFFFFFFFE
-#define R_028AB4_VGT_REUSE_OFF                       0x028AB4
-#define   S_028AB4_REUSE_OFF(x)                        (((x) & 0x1) << 0)
-#define   G_028AB4_REUSE_OFF(x)                        (((x) >> 0) & 0x1)
-#define   C_028AB4_REUSE_OFF                           0xFFFFFFFE
-#define R_028AB8_VGT_VTX_CNT_EN                      0x028AB8
-#define   S_028AB8_VTX_CNT_EN(x)                       (((x) & 0x1) << 0)
-#define   G_028AB8_VTX_CNT_EN(x)                       (((x) >> 0) & 0x1)
-#define   C_028AB8_VTX_CNT_EN                          0xFFFFFFFE
-#define R_028B20_VGT_STRMOUT_BUFFER_EN               0x028B20
-#define   S_028B20_BUFFER_0_EN(x)                      (((x) & 0x1) << 0)
-#define   G_028B20_BUFFER_0_EN(x)                      (((x) >> 0) & 0x1)
-#define   C_028B20_BUFFER_0_EN                         0xFFFFFFFE
-#define   S_028B20_BUFFER_1_EN(x)                      (((x) & 0x1) << 1)
-#define   G_028B20_BUFFER_1_EN(x)                      (((x) >> 1) & 0x1)
-#define   C_028B20_BUFFER_1_EN                         0xFFFFFFFD
-#define   S_028B20_BUFFER_2_EN(x)                      (((x) & 0x1) << 2)
-#define   G_028B20_BUFFER_2_EN(x)                      (((x) >> 2) & 0x1)
-#define   C_028B20_BUFFER_2_EN                         0xFFFFFFFB
-#define   S_028B20_BUFFER_3_EN(x)                      (((x) & 0x1) << 3)
-#define   G_028B20_BUFFER_3_EN(x)                      (((x) >> 3) & 0x1)
-#define   C_028B20_BUFFER_3_EN                         0xFFFFFFF7
-#define R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX    0x028C20
-#define   S_028C20_S4_X(x)                             (((x) & 0xF) << 0)
-#define   G_028C20_S4_X(x)                             (((x) >> 0) & 0xF)
-#define   C_028C20_S4_X                                0xFFFFFFF0
-#define   S_028C20_S4_Y(x)                             (((x) & 0xF) << 4)
-#define   G_028C20_S4_Y(x)                             (((x) >> 4) & 0xF)
-#define   C_028C20_S4_Y                                0xFFFFFF0F
-#define   S_028C20_S5_X(x)                             (((x) & 0xF) << 8)
-#define   G_028C20_S5_X(x)                             (((x) >> 8) & 0xF)
-#define   C_028C20_S5_X                                0xFFFFF0FF
-#define   S_028C20_S5_Y(x)                             (((x) & 0xF) << 12)
-#define   G_028C20_S5_Y(x)                             (((x) >> 12) & 0xF)
-#define   C_028C20_S5_Y                                0xFFFF0FFF
-#define   S_028C20_S6_X(x)                             (((x) & 0xF) << 16)
-#define   G_028C20_S6_X(x)                             (((x) >> 16) & 0xF)
-#define   C_028C20_S6_X                                0xFFF0FFFF
-#define   S_028C20_S6_Y(x)                             (((x) & 0xF) << 20)
-#define   G_028C20_S6_Y(x)                             (((x) >> 20) & 0xF)
-#define   C_028C20_S6_Y                                0xFF0FFFFF
-#define   S_028C20_S7_X(x)                             (((x) & 0xF) << 24)
-#define   G_028C20_S7_X(x)                             (((x) >> 24) & 0xF)
-#define   C_028C20_S7_X                                0xF0FFFFFF
-#define   S_028C20_S7_Y(x)                             (((x) & 0xF) << 28)
-#define   G_028C20_S7_Y(x)                             (((x) >> 28) & 0xF)
-#define   C_028C20_S7_Y                                0x0FFFFFFF
-#define R_028C30_CB_CLRCMP_CONTROL                   0x028C30
-#define   S_028C30_CLRCMP_FCN_SRC(x)                   (((x) & 0x7) << 0)
-#define   G_028C30_CLRCMP_FCN_SRC(x)                   (((x) >> 0) & 0x7)
-#define   C_028C30_CLRCMP_FCN_SRC                      0xFFFFFFF8
-#define   S_028C30_CLRCMP_FCN_DST(x)                   (((x) & 0x7) << 8)
-#define   G_028C30_CLRCMP_FCN_DST(x)                   (((x) >> 8) & 0x7)
-#define   C_028C30_CLRCMP_FCN_DST                      0xFFFFF8FF
-#define   S_028C30_CLRCMP_FCN_SEL(x)                   (((x) & 0x3) << 24)
-#define   G_028C30_CLRCMP_FCN_SEL(x)                   (((x) >> 24) & 0x3)
-#define   C_028C30_CLRCMP_FCN_SEL                      0xFCFFFFFF
-#define R_028C34_CB_CLRCMP_SRC                       0x028C34
-#define   S_028C34_CLRCMP_SRC(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_028C34_CLRCMP_SRC(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028C34_CLRCMP_SRC                          0x00000000
-#define R_028C38_CB_CLRCMP_DST                       0x028C38
-#define   S_028C38_CLRCMP_DST(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_028C38_CLRCMP_DST(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028C38_CLRCMP_DST                          0x00000000
-#define R_028C3C_CB_CLRCMP_MSK                       0x028C3C
-#define   S_028C3C_CLRCMP_MSK(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_028C3C_CLRCMP_MSK(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028C3C_CLRCMP_MSK                          0x00000000
-#define R_0085F0_CP_COHER_CNTL                       0x0085F0
-#define   S_0085F0_DEST_BASE_0_ENA(x)                  (((x) & 0x1) << 0)
-#define   G_0085F0_DEST_BASE_0_ENA(x)                  (((x) >> 0) & 0x1)
-#define   C_0085F0_DEST_BASE_0_ENA                     0xFFFFFFFE
-#define   S_0085F0_DEST_BASE_1_ENA(x)                  (((x) & 0x1) << 1)
-#define   G_0085F0_DEST_BASE_1_ENA(x)                  (((x) >> 1) & 0x1)
-#define   C_0085F0_DEST_BASE_1_ENA                     0xFFFFFFFD
-#define   S_0085F0_SO0_DEST_BASE_ENA(x)                (((x) & 0x1) << 2)
-#define   G_0085F0_SO0_DEST_BASE_ENA(x)                (((x) >> 2) & 0x1)
-#define   C_0085F0_SO0_DEST_BASE_ENA                   0xFFFFFFFB
-#define   S_0085F0_SO1_DEST_BASE_ENA(x)                (((x) & 0x1) << 3)
-#define   G_0085F0_SO1_DEST_BASE_ENA(x)                (((x) >> 3) & 0x1)
-#define   C_0085F0_SO1_DEST_BASE_ENA                   0xFFFFFFF7
-#define   S_0085F0_SO2_DEST_BASE_ENA(x)                (((x) & 0x1) << 4)
-#define   G_0085F0_SO2_DEST_BASE_ENA(x)                (((x) >> 4) & 0x1)
-#define   C_0085F0_SO2_DEST_BASE_ENA                   0xFFFFFFEF
-#define   S_0085F0_SO3_DEST_BASE_ENA(x)                (((x) & 0x1) << 5)
-#define   G_0085F0_SO3_DEST_BASE_ENA(x)                (((x) >> 5) & 0x1)
-#define   C_0085F0_SO3_DEST_BASE_ENA                   0xFFFFFFDF
-#define   S_0085F0_CB0_DEST_BASE_ENA(x)                (((x) & 0x1) << 6)
-#define   G_0085F0_CB0_DEST_BASE_ENA(x)                (((x) >> 6) & 0x1)
-#define   C_0085F0_CB0_DEST_BASE_ENA                   0xFFFFFFBF
-#define   S_0085F0_CB1_DEST_BASE_ENA(x)                (((x) & 0x1) << 7)
-#define   G_0085F0_CB1_DEST_BASE_ENA(x)                (((x) >> 7) & 0x1)
-#define   C_0085F0_CB1_DEST_BASE_ENA                   0xFFFFFF7F
-#define   S_0085F0_CB2_DEST_BASE_ENA(x)                (((x) & 0x1) << 8)
-#define   G_0085F0_CB2_DEST_BASE_ENA(x)                (((x) >> 8) & 0x1)
-#define   C_0085F0_CB2_DEST_BASE_ENA                   0xFFFFFEFF
-#define   S_0085F0_CB3_DEST_BASE_ENA(x)                (((x) & 0x1) << 9)
-#define   G_0085F0_CB3_DEST_BASE_ENA(x)                (((x) >> 9) & 0x1)
-#define   C_0085F0_CB3_DEST_BASE_ENA                   0xFFFFFDFF
-#define   S_0085F0_CB4_DEST_BASE_ENA(x)                (((x) & 0x1) << 10)
-#define   G_0085F0_CB4_DEST_BASE_ENA(x)                (((x) >> 10) & 0x1)
-#define   C_0085F0_CB4_DEST_BASE_ENA                   0xFFFFFBFF
-#define   S_0085F0_CB5_DEST_BASE_ENA(x)                (((x) & 0x1) << 11)
-#define   G_0085F0_CB5_DEST_BASE_ENA(x)                (((x) >> 11) & 0x1)
-#define   C_0085F0_CB5_DEST_BASE_ENA                   0xFFFFF7FF
-#define   S_0085F0_CB6_DEST_BASE_ENA(x)                (((x) & 0x1) << 12)
-#define   G_0085F0_CB6_DEST_BASE_ENA(x)                (((x) >> 12) & 0x1)
-#define   C_0085F0_CB6_DEST_BASE_ENA                   0xFFFFEFFF
-#define   S_0085F0_CB7_DEST_BASE_ENA(x)                (((x) & 0x1) << 13)
-#define   G_0085F0_CB7_DEST_BASE_ENA(x)                (((x) >> 13) & 0x1)
-#define   C_0085F0_CB7_DEST_BASE_ENA                   0xFFFFDFFF
-#define   S_0085F0_DB_DEST_BASE_ENA(x)                 (((x) & 0x1) << 14)
-#define   G_0085F0_DB_DEST_BASE_ENA(x)                 (((x) >> 14) & 0x1)
-#define   C_0085F0_DB_DEST_BASE_ENA                    0xFFFFBFFF
-#define   S_0085F0_CR_DEST_BASE_ENA(x)                 (((x) & 0x1) << 15)
-#define   G_0085F0_CR_DEST_BASE_ENA(x)                 (((x) >> 15) & 0x1)
-#define   C_0085F0_CR_DEST_BASE_ENA                    0xFFFF7FFF
-#define   S_0085F0_TC_ACTION_ENA(x)                    (((x) & 0x1) << 23)
-#define   G_0085F0_TC_ACTION_ENA(x)                    (((x) >> 23) & 0x1)
-#define   C_0085F0_TC_ACTION_ENA                       0xFF7FFFFF
-#define   S_0085F0_VC_ACTION_ENA(x)                    (((x) & 0x1) << 24)
-#define   G_0085F0_VC_ACTION_ENA(x)                    (((x) >> 24) & 0x1)
-#define   C_0085F0_VC_ACTION_ENA                       0xFEFFFFFF
-#define   S_0085F0_CB_ACTION_ENA(x)                    (((x) & 0x1) << 25)
-#define   G_0085F0_CB_ACTION_ENA(x)                    (((x) >> 25) & 0x1)
-#define   C_0085F0_CB_ACTION_ENA                       0xFDFFFFFF
-#define   S_0085F0_DB_ACTION_ENA(x)                    (((x) & 0x1) << 26)
-#define   G_0085F0_DB_ACTION_ENA(x)                    (((x) >> 26) & 0x1)
-#define   C_0085F0_DB_ACTION_ENA                       0xFBFFFFFF
-#define   S_0085F0_SH_ACTION_ENA(x)                    (((x) & 0x1) << 27)
-#define   G_0085F0_SH_ACTION_ENA(x)                    (((x) >> 27) & 0x1)
-#define   C_0085F0_SH_ACTION_ENA                       0xF7FFFFFF
-#define   S_0085F0_SMX_ACTION_ENA(x)                   (((x) & 0x1) << 28)
-#define   G_0085F0_SMX_ACTION_ENA(x)                   (((x) >> 28) & 0x1)
-#define   C_0085F0_SMX_ACTION_ENA                      0xEFFFFFFF
-#define   S_0085F0_CR0_ACTION_ENA(x)                   (((x) & 0x1) << 29)
-#define   G_0085F0_CR0_ACTION_ENA(x)                   (((x) >> 29) & 0x1)
-#define   C_0085F0_CR0_ACTION_ENA                      0xDFFFFFFF
-#define   S_0085F0_CR1_ACTION_ENA(x)                   (((x) & 0x1) << 30)
-#define   G_0085F0_CR1_ACTION_ENA(x)                   (((x) >> 30) & 0x1)
-#define   C_0085F0_CR1_ACTION_ENA                      0xBFFFFFFF
-#define   S_0085F0_CR2_ACTION_ENA(x)                   (((x) & 0x1) << 31)
-#define   G_0085F0_CR2_ACTION_ENA(x)                   (((x) >> 31) & 0x1)
-#define   C_0085F0_CR2_ACTION_ENA                      0x7FFFFFFF
-
-
-#define R_02812C_CB_CLEAR_ALPHA                      0x02812C
-#define   S_02812C_CLEAR_ALPHA(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_02812C_CLEAR_ALPHA(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_02812C_CLEAR_ALPHA                         0x00000000
-#define R_028128_CB_CLEAR_BLUE                       0x028128
-#define   S_028128_CLEAR_BLUE(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_028128_CLEAR_BLUE(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028128_CLEAR_BLUE                          0x00000000
-#define R_028124_CB_CLEAR_GREEN                      0x028124
-#define   S_028124_CLEAR_GREEN(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_028124_CLEAR_GREEN(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028124_CLEAR_GREEN                         0x00000000
-#define R_028120_CB_CLEAR_RED                        0x028120
-#define   S_028120_CLEAR_RED(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028120_CLEAR_RED(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028120_CLEAR_RED                           0x00000000
-#define R_02842C_CB_FOG_BLUE                         0x02842C
-#define   S_02842C_FOG_BLUE(x)                         (((x) & 0xFFFFFFFF) << 0)
-#define   G_02842C_FOG_BLUE(x)                         (((x) >> 0) & 0xFFFFFFFF)
-#define   C_02842C_FOG_BLUE                            0x00000000
-#define R_028428_CB_FOG_GREEN                        0x028428
-#define   S_028428_FOG_GREEN(x)                        (((x) & 0xFFFFFFFF) << 0)
-#define   G_028428_FOG_GREEN(x)                        (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028428_FOG_GREEN                           0x00000000
-#define R_028424_CB_FOG_RED                          0x028424
-#define   S_028424_FOG_RED(x)                          (((x) & 0xFFFFFFFF) << 0)
-#define   G_028424_FOG_RED(x)                          (((x) >> 0) & 0xFFFFFFFF)
-#define   C_028424_FOG_RED                             0x00000000
-#define R_03C000_SQ_TEX_SAMPLER_WORD0_0              0x03C000
-#define   S_03C000_CLAMP_X(x)                          (((x) & 0x7) << 0)
-#define   G_03C000_CLAMP_X(x)                          (((x) >> 0) & 0x7)
-#define   C_03C000_CLAMP_X                             0xFFFFFFF8
-#define   S_03C000_CLAMP_Y(x)                          (((x) & 0x7) << 3)
-#define   G_03C000_CLAMP_Y(x)                          (((x) >> 3) & 0x7)
-#define   C_03C000_CLAMP_Y                             0xFFFFFFC7
-#define   S_03C000_CLAMP_Z(x)                          (((x) & 0x7) << 6)
-#define   G_03C000_CLAMP_Z(x)                          (((x) >> 6) & 0x7)
-#define   C_03C000_CLAMP_Z                             0xFFFFFE3F
-#define   S_03C000_XY_MAG_FILTER(x)                    (((x) & 0x7) << 9)
-#define   G_03C000_XY_MAG_FILTER(x)                    (((x) >> 9) & 0x7)
-#define   C_03C000_XY_MAG_FILTER                       0xFFFFF1FF
-#define   S_03C000_XY_MIN_FILTER(x)                    (((x) & 0x7) << 12)
-#define   G_03C000_XY_MIN_FILTER(x)                    (((x) >> 12) & 0x7)
-#define   C_03C000_XY_MIN_FILTER                       0xFFFF8FFF
-#define   S_03C000_Z_FILTER(x)                         (((x) & 0x3) << 15)
-#define   G_03C000_Z_FILTER(x)                         (((x) >> 15) & 0x3)
-#define   C_03C000_Z_FILTER                            0xFFFE7FFF
-#define   S_03C000_MIP_FILTER(x)                       (((x) & 0x3) << 17)
-#define   G_03C000_MIP_FILTER(x)                       (((x) >> 17) & 0x3)
-#define   C_03C000_MIP_FILTER                          0xFFF9FFFF
-#define   S_03C000_BORDER_COLOR_TYPE(x)                (((x) & 0x3) << 22)
-#define   G_03C000_BORDER_COLOR_TYPE(x)                (((x) >> 22) & 0x3)
-#define   C_03C000_BORDER_COLOR_TYPE                   0xFF3FFFFF
-#define   S_03C000_POINT_SAMPLING_CLAMP(x)             (((x) & 0x1) << 24)
-#define   G_03C000_POINT_SAMPLING_CLAMP(x)             (((x) >> 24) & 0x1)
-#define   C_03C000_POINT_SAMPLING_CLAMP                0xFEFFFFFF
-#define   S_03C000_TEX_ARRAY_OVERRIDE(x)               (((x) & 0x1) << 25)
-#define   G_03C000_TEX_ARRAY_OVERRIDE(x)               (((x) >> 25) & 0x1)
-#define   C_03C000_TEX_ARRAY_OVERRIDE                  0xFDFFFFFF
-#define   S_03C000_DEPTH_COMPARE_FUNCTION(x)           (((x) & 0x7) << 26)
-#define   G_03C000_DEPTH_COMPARE_FUNCTION(x)           (((x) >> 26) & 0x7)
-#define   C_03C000_DEPTH_COMPARE_FUNCTION              0xE3FFFFFF
-#define   S_03C000_CHROMA_KEY(x)                       (((x) & 0x3) << 29)
-#define   G_03C000_CHROMA_KEY(x)                       (((x) >> 29) & 0x3)
-#define   C_03C000_CHROMA_KEY                          0x9FFFFFFF
-#define   S_03C000_LOD_USES_MINOR_AXIS(x)              (((x) & 0x1) << 31)
-#define   G_03C000_LOD_USES_MINOR_AXIS(x)              (((x) >> 31) & 0x1)
-#define   C_03C000_LOD_USES_MINOR_AXIS                 0x7FFFFFFF
-#define R_03C004_SQ_TEX_SAMPLER_WORD1_0              0x03C004
-#define   S_03C004_MIN_LOD(x)                          (((x) & 0x3FF) << 0)
-#define   G_03C004_MIN_LOD(x)                          (((x) >> 0) & 0x3FF)
-#define   C_03C004_MIN_LOD                             0xFFFFFC00
-#define   S_03C004_MAX_LOD(x)                          (((x) & 0x3FF) << 10)
-#define   G_03C004_MAX_LOD(x)                          (((x) >> 10) & 0x3FF)
-#define   C_03C004_MAX_LOD                             0xFFF003FF
-#define   S_03C004_LOD_BIAS(x)                         (((x) & 0xFFF) << 20)
-#define   G_03C004_LOD_BIAS(x)                         (((x) >> 20) & 0xFFF)
-#define   C_03C004_LOD_BIAS                            0x000FFFFF
-#define R_03C008_SQ_TEX_SAMPLER_WORD2_0              0x03C008
-#define   S_03C008_LOD_BIAS_SEC(x)                     (((x) & 0xFFF) << 0)
-#define   G_03C008_LOD_BIAS_SEC(x)                     (((x) >> 0) & 0xFFF)
-#define   C_03C008_LOD_BIAS_SEC                        0xFFFFF000
-#define   S_03C008_MC_COORD_TRUNCATE(x)                (((x) & 0x1) << 12)
-#define   G_03C008_MC_COORD_TRUNCATE(x)                (((x) >> 12) & 0x1)
-#define   C_03C008_MC_COORD_TRUNCATE                   0xFFFFEFFF
-#define   S_03C008_FORCE_DEGAMMA(x)                    (((x) & 0x1) << 13)
-#define   G_03C008_FORCE_DEGAMMA(x)                    (((x) >> 13) & 0x1)
-#define   C_03C008_FORCE_DEGAMMA                       0xFFFFDFFF
-#define   S_03C008_HIGH_PRECISION_FILTER(x)            (((x) & 0x1) << 14)
-#define   G_03C008_HIGH_PRECISION_FILTER(x)            (((x) >> 14) & 0x1)
-#define   C_03C008_HIGH_PRECISION_FILTER               0xFFFFBFFF
-#define   S_03C008_PERF_MIP(x)                         (((x) & 0x7) << 15)
-#define   G_03C008_PERF_MIP(x)                         (((x) >> 15) & 0x7)
-#define   C_03C008_PERF_MIP                            0xFFFC7FFF
-#define   S_03C008_PERF_Z(x)                           (((x) & 0x3) << 18)
-#define   G_03C008_PERF_Z(x)                           (((x) >> 18) & 0x3)
-#define   C_03C008_PERF_Z                              0xFFF3FFFF
-#define   S_03C008_FETCH_4(x)                          (((x) & 0x1) << 26)
-#define   G_03C008_FETCH_4(x)                          (((x) >> 26) & 0x1)
-#define   C_03C008_FETCH_4                             0xFBFFFFFF
-#define   S_03C008_SAMPLE_IS_PCF(x)                    (((x) & 0x1) << 27)
-#define   G_03C008_SAMPLE_IS_PCF(x)                    (((x) >> 27) & 0x1)
-#define   C_03C008_SAMPLE_IS_PCF                       0xF7FFFFFF
-#define   S_03C008_TYPE(x)                             (((x) & 0x1) << 31)
-#define   G_03C008_TYPE(x)                             (((x) >> 31) & 0x1)
-#define   C_03C008_TYPE                                0x7FFFFFFF
-#define R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA         0x00A40C
-#define   S_00A40C_BORDER_ALPHA(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A40C_BORDER_ALPHA(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A40C_BORDER_ALPHA                        0x00000000
-#define R_00A408_TD_PS_SAMPLER0_BORDER_BLUE          0x00A408
-#define   S_00A408_BORDER_BLUE(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A408_BORDER_BLUE(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A408_BORDER_BLUE                         0x00000000
-#define R_00A404_TD_PS_SAMPLER0_BORDER_GREEN         0x00A404
-#define   S_00A404_BORDER_GREEN(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A404_BORDER_GREEN(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A404_BORDER_GREEN                        0x00000000
-#define R_00A400_TD_PS_SAMPLER0_BORDER_RED           0x00A400
-#define   S_00A400_BORDER_RED(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A400_BORDER_RED(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A400_BORDER_RED                          0x00000000
-#define R_00A60C_TD_VS_SAMPLER0_BORDER_ALPHA         0x00A60C
-#define   S_00A60C_BORDER_ALPHA(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A60C_BORDER_ALPHA(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A60C_BORDER_ALPHA                        0x00000000
-#define R_00A608_TD_VS_SAMPLER0_BORDER_BLUE          0x00A608
-#define   S_00A608_BORDER_BLUE(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A608_BORDER_BLUE(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A608_BORDER_BLUE                         0x00000000
-#define R_00A604_TD_VS_SAMPLER0_BORDER_GREEN         0x00A604
-#define   S_00A604_BORDER_GREEN(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A604_BORDER_GREEN(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A604_BORDER_GREEN                        0x00000000
-#define R_00A600_TD_VS_SAMPLER0_BORDER_RED           0x00A600
-#define   S_00A600_BORDER_RED(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A600_BORDER_RED(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A600_BORDER_RED                          0x00000000
-#define R_00A80C_TD_GS_SAMPLER0_BORDER_ALPHA         0x00A80C
-#define   S_00A80C_BORDER_ALPHA(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A80C_BORDER_ALPHA(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A80C_BORDER_ALPHA                        0x00000000
-#define R_00A808_TD_GS_SAMPLER0_BORDER_BLUE          0x00A808
-#define   S_00A808_BORDER_BLUE(x)                      (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A808_BORDER_BLUE(x)                      (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A808_BORDER_BLUE                         0x00000000
-#define R_00A804_TD_GS_SAMPLER0_BORDER_GREEN         0x00A804
-#define   S_00A804_BORDER_GREEN(x)                     (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A804_BORDER_GREEN(x)                     (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A804_BORDER_GREEN                        0x00000000
-#define R_00A800_TD_GS_SAMPLER0_BORDER_RED           0x00A800
-#define   S_00A800_BORDER_RED(x)                       (((x) & 0xFFFFFFFF) << 0)
-#define   G_00A800_BORDER_RED(x)                       (((x) >> 0) & 0xFFFFFFFF)
-#define   C_00A800_BORDER_RED                          0x00000000
-#define R_030000_SQ_ALU_CONSTANT0_0                  0x030000
-#define   S_030000_X(x)                                (((x) & 0xFFFFFFFF) << 0)
-#define   G_030000_X(x)                                (((x) >> 0) & 0xFFFFFFFF)
-#define   C_030000_X                                   0x00000000
-#define R_030004_SQ_ALU_CONSTANT1_0                  0x030004
-#define   S_030004_Y(x)                                (((x) & 0xFFFFFFFF) << 0)
-#define   G_030004_Y(x)                                (((x) >> 0) & 0xFFFFFFFF)
-#define   C_030004_Y                                   0x00000000
-#define R_030008_SQ_ALU_CONSTANT2_0                  0x030008
-#define   S_030008_Z(x)                                (((x) & 0xFFFFFFFF) << 0)
-#define   G_030008_Z(x)                                (((x) >> 0) & 0xFFFFFFFF)
-#define   C_030008_Z                                   0x00000000
-#define R_03000C_SQ_ALU_CONSTANT3_0                  0x03000C
-#define   S_03000C_W(x)                                (((x) & 0xFFFFFFFF) << 0)
-#define   G_03000C_W(x)                                (((x) >> 0) & 0xFFFFFFFF)
-#define   C_03000C_W                                   0x00000000
-#define R_0287E4_VGT_DMA_BASE_HI                     0x0287E4
-#define R_0287E8_VGT_DMA_BASE                        0x0287E8
-#define R_028E20_PA_CL_UCP0_X                        0x028E20
-#define R_028E24_PA_CL_UCP0_Y                        0x028E24
-#define R_028E28_PA_CL_UCP0_Z                        0x028E28
-#define R_028E2C_PA_CL_UCP0_W                        0x028E2C
-#define R_028E30_PA_CL_UCP1_X                        0x028E30
-#define R_028E34_PA_CL_UCP1_Y                        0x028E34
-#define R_028E38_PA_CL_UCP1_Z                        0x028E38
-#define R_028E3C_PA_CL_UCP1_W                        0x028E3C
-#define R_028E40_PA_CL_UCP2_X                        0x028E40
-#define R_028E44_PA_CL_UCP2_Y                        0x028E44
-#define R_028E48_PA_CL_UCP2_Z                        0x028E48
-#define R_028E4C_PA_CL_UCP2_W                        0x028E4C
-#define R_028E50_PA_CL_UCP3_X                        0x028E50
-#define R_028E54_PA_CL_UCP3_Y                        0x028E54
-#define R_028E58_PA_CL_UCP3_Z                        0x028E58
-#define R_028E5C_PA_CL_UCP3_W                        0x028E5C
-#define R_028E60_PA_CL_UCP4_X                        0x028E60
-#define R_028E64_PA_CL_UCP4_Y                        0x028E64
-#define R_028E68_PA_CL_UCP4_Z                        0x028E68
-#define R_028E6C_PA_CL_UCP4_W                        0x028E6C
-#define R_028E70_PA_CL_UCP5_X                        0x028E70
-#define R_028E74_PA_CL_UCP5_Y                        0x028E74
-#define R_028E78_PA_CL_UCP5_Z                        0x028E78
-#define R_028E7C_PA_CL_UCP5_W                        0x028E7C
-#define R_038000_RESOURCE0_WORD0                     0x038000
-#define R_038004_RESOURCE0_WORD1                     0x038004
-#define R_038008_RESOURCE0_WORD2                     0x038008
-#define R_03800C_RESOURCE0_WORD3                     0x03800C
-#define R_038010_RESOURCE0_WORD4                     0x038010
-#define R_038014_RESOURCE0_WORD5                     0x038014
-#define R_038018_RESOURCE0_WORD6                     0x038018
-
-#define R_028140_ALU_CONST_BUFFER_SIZE_PS_0          0x00028140
-#define R_028180_ALU_CONST_BUFFER_SIZE_VS_0          0x00028180
-#define R_028940_ALU_CONST_CACHE_PS_0                0x00028940
-#define R_028980_ALU_CONST_CACHE_VS_0                0x00028980
-
-#define R_03CFF0_SQ_VTX_BASE_VTX_LOC                 0x03CFF0
-#define R_03CFF4_SQ_VTX_START_INST_LOC               0x03CFF4
-
-#endif

From 233dd4953e3e6cf39f3c7a7cd898339a89d2ff86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 10 Aug 2011 02:58:40 +0200
Subject: [PATCH 342/600] u_blitter: rename util_blitter_copy_region ->
 util_blitter_copy_texture

---
 src/gallium/auxiliary/util/u_blitter.c  | 16 ++++++++--------
 src/gallium/auxiliary/util/u_blitter.h  | 21 ++++++++++++---------
 src/gallium/drivers/i915/i915_surface.c |  2 +-
 src/gallium/drivers/nvfx/nvfx_surface.c |  2 +-
 src/gallium/drivers/r300/r300_blit.c    |  4 ++--
 src/gallium/drivers/r600/r600_blit.c    |  4 ++--
 6 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 528f344a0f7..7ecb76f38f2 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -724,14 +724,14 @@ boolean is_overlap(unsigned sx1, unsigned sx2, unsigned sy1, unsigned sy2,
    return sx1 < dx2 && sx2 > dx1 && sy1 < dy2 && sy2 > dy1;
 }
 
-void util_blitter_copy_region(struct blitter_context *blitter,
-                              struct pipe_resource *dst,
-                              unsigned dstlevel,
-                              unsigned dstx, unsigned dsty, unsigned dstz,
-                              struct pipe_resource *src,
-                              unsigned srclevel,
-                              const struct pipe_box *srcbox,
-                              boolean ignore_stencil)
+void util_blitter_copy_texture(struct blitter_context *blitter,
+                               struct pipe_resource *dst,
+                               unsigned dstlevel,
+                               unsigned dstx, unsigned dsty, unsigned dstz,
+                               struct pipe_resource *src,
+                               unsigned srclevel,
+                               const struct pipe_box *srcbox,
+                               boolean ignore_stencil)
 {
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = ctx->base.pipe;
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index 41470d92bba..df6f023a638 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -126,12 +126,15 @@ struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter)
 }
 
 /*
- * These CSOs must be saved before any of the following functions is called:
+ * These states must be saved before any of the following functions is called:
  * - blend state
  * - depth stencil alpha state
  * - rasterizer state
  * - vertex shader
+ * - any other shader??? (XXX)
  * - fragment shader
+ * - vertex buffers
+ * - vertex elements
  */
 
 /**
@@ -169,14 +172,14 @@ void util_blitter_clear_depth_custom(struct blitter_context *blitter,
  * - fragment sampler states
  * - fragment sampler textures
  */
-void util_blitter_copy_region(struct blitter_context *blitter,
-                              struct pipe_resource *dst,
-                              unsigned dstlevel,
-                              unsigned dstx, unsigned dsty, unsigned dstz,
-                              struct pipe_resource *src,
-                              unsigned srclevel,
-                              const struct pipe_box *srcbox,
-                              boolean ignore_stencil);
+void util_blitter_copy_texture(struct blitter_context *blitter,
+                               struct pipe_resource *dst,
+                               unsigned dstlevel,
+                               unsigned dstx, unsigned dsty, unsigned dstz,
+                               struct pipe_resource *src,
+                               unsigned srclevel,
+                               const struct pipe_box *srcbox,
+                               boolean ignore_stencil);
 
 /**
  * Clear a region of a (color) surface to a constant value.
diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c
index ac6e94500c8..41146be9311 100644
--- a/src/gallium/drivers/i915/i915_surface.c
+++ b/src/gallium/drivers/i915/i915_surface.c
@@ -80,7 +80,7 @@ i915_surface_copy_render(struct pipe_context *pipe,
                                             i915->saved_nr_sampler_views,
                                             i915->saved_sampler_views);
 
-   util_blitter_copy_region(i915->blitter, dst, dst_level, dstx, dsty, dstz,
+   util_blitter_copy_texture(i915->blitter, dst, dst_level, dstx, dsty, dstz,
                             src, src_level, src_box, TRUE);
 }
 
diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c
index 339906e6a63..04b0304b44f 100644
--- a/src/gallium/drivers/nvfx/nvfx_surface.c
+++ b/src/gallium/drivers/nvfx/nvfx_surface.c
@@ -288,7 +288,7 @@ nvfx_resource_copy_region(struct pipe_context *pipe,
 		 * TODO: perhaps support reinterpreting the formats
 		 */
 		struct blitter_context* blitter = nvfx_get_blitter(pipe, 1);
-		util_blitter_copy_region(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE);
+		util_blitter_copy_texture(blitter, dstr, dst_level, dstx, dsty, dstz, srcr, src_level, src_box, TRUE);
 		nvfx_put_blitter(pipe, blitter);
 	}
 	else
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index e7a926829d1..ddf5448a34b 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -445,8 +445,8 @@ static void r300_hw_copy_region(struct pipe_context* pipe,
     struct r300_context* r300 = r300_context(pipe);
 
     r300_blitter_begin(r300, R300_COPY);
-    util_blitter_copy_region(r300->blitter, dst, dst_level, dstx, dsty, dstz,
-                             src, src_level, src_box, TRUE);
+    util_blitter_copy_texture(r300->blitter, dst, dst_level, dstx, dsty, dstz,
+                              src, src_level, src_box, TRUE);
     r300_blitter_end(r300);
 }
 
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 35e68b6e222..e1cf585234e 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -233,8 +233,8 @@ static void r600_hw_copy_region(struct pipe_context *ctx,
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 
 	r600_blitter_begin(ctx, R600_COPY);
-	util_blitter_copy_region(rctx->blitter, dst, dst_level, dstx, dsty, dstz,
-				 src, src_level, src_box, TRUE);
+	util_blitter_copy_texture(rctx->blitter, dst, dst_level, dstx, dsty, dstz,
+				  src, src_level, src_box, TRUE);
 	r600_blitter_end(ctx);
 }
 

From 363295d7209636c56ee0cb1246915b03b7f73524 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Mon, 15 Aug 2011 20:52:44 +0200
Subject: [PATCH 343/600] u_blitter: restore some states conditionally

---
 src/gallium/auxiliary/util/u_blitter.c | 29 +++++++++++++++-----------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 7ecb76f38f2..d8e46f07c88 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -26,8 +26,8 @@
 
 /**
  * @file
- * Blitter utility to facilitate acceleration of the clear, clear_render_target, clear_depth_stencil
- * resource_copy_region functions.
+ * Blitter utility to facilitate acceleration of the clear, clear_render_target,
+ * clear_depth_stencil, and resource_copy_region functions.
  *
  * @author Marek Olšák
  */
@@ -197,8 +197,6 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    memset(&velem[0], 0, sizeof(velem[0]) * 2);
    for (i = 0; i < 2; i++) {
       velem[i].src_offset = i * 4 * sizeof(float);
-      velem[i].instance_divisor = 0;
-      velem[i].vertex_buffer_index = 0;
       velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
    }
    ctx->velem_state = pipe->create_vertex_elements_state(pipe, 2, &velem[0]);
@@ -288,26 +286,33 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
    unsigned i;
 
    /* restore the state objects which are always required to be saved */
-   pipe->bind_blend_state(pipe, ctx->base.saved_blend_state);
-   pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state);
    pipe->bind_rasterizer_state(pipe, ctx->base.saved_rs_state);
-   pipe->bind_fs_state(pipe, ctx->base.saved_fs);
    pipe->bind_vs_state(pipe, ctx->base.saved_vs);
    pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state);
 
-   ctx->base.saved_blend_state = INVALID_PTR;
-   ctx->base.saved_dsa_state = INVALID_PTR;
    ctx->base.saved_rs_state = INVALID_PTR;
-   ctx->base.saved_fs = INVALID_PTR;
    ctx->base.saved_vs = INVALID_PTR;
    ctx->base.saved_velem_state = INVALID_PTR;
 
+   /* restore the state objects which are required to be saved for clear/copy
+    */
+   if (ctx->base.saved_blend_state != INVALID_PTR) {
+      pipe->bind_blend_state(pipe, ctx->base.saved_blend_state);
+      ctx->base.saved_blend_state = INVALID_PTR;
+   }
+   if (ctx->base.saved_dsa_state != INVALID_PTR) {
+      pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state);
+      ctx->base.saved_dsa_state = INVALID_PTR;
+   }
+   if (ctx->base.saved_fs != INVALID_PTR) {
+      pipe->bind_fs_state(pipe, ctx->base.saved_fs);
+      ctx->base.saved_fs = INVALID_PTR;
+   }
+
    pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref);
    pipe->set_viewport_state(pipe, &ctx->base.saved_viewport);
    pipe->set_clip_state(pipe, &ctx->base.saved_clip);
 
-   /* restore the state objects which are required to be saved before copy/fill
-    */
    if (ctx->base.saved_fb_state.nr_cbufs != ~0) {
       pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state);
       util_unreference_framebuffer_state(&ctx->base.saved_fb_state);

From 21c5d11b7ee1f6fd9d16752d8921976d9951623d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Mon, 15 Aug 2011 19:37:33 +0200
Subject: [PATCH 344/600] noop: redirect the get_param/is_format.. queries to
 the underlying driver

---
 src/gallium/drivers/noop/noop_pipe.c | 119 +++------------------------
 1 file changed, 11 insertions(+), 108 deletions(-)

diff --git a/src/gallium/drivers/noop/noop_pipe.c b/src/gallium/drivers/noop/noop_pipe.c
index 18308b796f3..ffc444e37d1 100644
--- a/src/gallium/drivers/noop/noop_pipe.c
+++ b/src/gallium/drivers/noop/noop_pipe.c
@@ -324,131 +324,34 @@ static const char *noop_get_name(struct pipe_screen* pscreen)
 
 static int noop_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 {
-	switch (param) {
-	/* Supported features (boolean caps). */
-	case PIPE_CAP_NPOT_TEXTURES:
-	case PIPE_CAP_TWO_SIDED_STENCIL:
-	case PIPE_CAP_GLSL:
-	case PIPE_CAP_OCCLUSION_QUERY:
-	case PIPE_CAP_POINT_SPRITE:
-	case PIPE_CAP_ANISOTROPIC_FILTER:
-	case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-	case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
-	case PIPE_CAP_TEXTURE_SHADOW_MAP:
-	case PIPE_CAP_TEXTURE_SWIZZLE:
-	case PIPE_CAP_BLEND_EQUATION_SEPARATE:
-	case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+	struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen;
 
-	  return 1;
-	case PIPE_CAP_DUAL_SOURCE_BLEND:
-
-	case PIPE_CAP_SM3:
-	case PIPE_CAP_INDEP_BLEND_ENABLE:
-	case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
-	case PIPE_CAP_DEPTH_CLAMP:
-	case PIPE_CAP_SHADER_STENCIL_EXPORT:
-	case PIPE_CAP_TIMER_QUERY:
-	case PIPE_CAP_STREAM_OUTPUT:
-	case PIPE_CAP_PRIMITIVE_RESTART:
-	case PIPE_CAP_INDEP_BLEND_FUNC:
-		return 0;
-
-	/* Texturing. */
-	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
-	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
-	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
-		return 14;
-	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
-		return 16;
-	case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
-	case PIPE_CAP_MAX_COMBINED_SAMPLERS:
-		return 16;
-
-	/* Render targets. */
-	case PIPE_CAP_MAX_RENDER_TARGETS:
-		return 8;
-
-	/* Fragment coordinate conventions. */
-	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
-	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
-		return 1;
-	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
-	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
-		return 0;
-
-	default:
-		return 0;
-	}
+	return screen->get_param(screen, param);
 }
 
 static float noop_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param)
 {
-	switch (param) {
-	case PIPE_CAP_MAX_LINE_WIDTH:
-	case PIPE_CAP_MAX_LINE_WIDTH_AA:
-	case PIPE_CAP_MAX_POINT_WIDTH:
-	case PIPE_CAP_MAX_POINT_WIDTH_AA:
-		return 8192.0f;
-	case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
-		return 16.0f;
-	case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
-		return 16.0f;
-	default:
-		return 0.0f;
-	}
+	struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen;
+
+	return screen->get_paramf(screen, param);
 }
 
 static int noop_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param)
 {
-	switch(shader)
-	{
-	case PIPE_SHADER_FRAGMENT:
-	case PIPE_SHADER_VERTEX:
-	case PIPE_SHADER_GEOMETRY:
-		break;
-	default:
-		return 0;
-	}
+	struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen;
 
-	switch (param) {
-	case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
-	case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
-	case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
-	case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
-		return 16384;
-	case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
-		return 8;
-	case PIPE_SHADER_CAP_MAX_INPUTS:
-		return 16;
-	case PIPE_SHADER_CAP_MAX_TEMPS:
-		return 256;
-	case PIPE_SHADER_CAP_MAX_ADDRS:
-		return 1;
-	case PIPE_SHADER_CAP_MAX_CONSTS:
-		return 256;
-	case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-		return 1;
-	case PIPE_SHADER_CAP_MAX_PREDS:
-		return 0;
-	case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
-		return 1;
-	case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
-	case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
-	case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
-	case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
-		return 1;
-	default:
-		return 0;
-	}
+	return screen->get_shader_param(screen, shader, param);
 }
 
-static boolean noop_is_format_supported(struct pipe_screen* screen,
+static boolean noop_is_format_supported(struct pipe_screen* pscreen,
 					enum pipe_format format,
 					enum pipe_texture_target target,
 					unsigned sample_count,
                                         unsigned usage)
 {
-	return true;
+	struct pipe_screen *screen = ((struct noop_pipe_screen*)pscreen)->oscreen;
+
+	return screen->is_format_supported(screen, format, target, sample_count, usage);
 }
 
 static void noop_destroy_screen(struct pipe_screen *screen)

From e3be51311834217cf35be9186e7dc9f57a10d44b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Mon, 15 Aug 2011 23:37:44 +0200
Subject: [PATCH 345/600] r600g: expose ARB_ES2_compatibility by claiming
 fixed-point format support

I also needed to make some changes in u_vbuf_mgr in order to override
the caps from the driver and enable the fallback even though the driver
claims the format is supported.
---
 src/gallium/auxiliary/util/u_vbuf_mgr.c | 46 ++++++++-----------------
 src/gallium/auxiliary/util/u_vbuf_mgr.h | 17 +++++++++
 src/gallium/drivers/r600/r600_formats.h |  1 -
 src/gallium/drivers/r600/r600_pipe.c    |  1 +
 4 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.c b/src/gallium/auxiliary/util/u_vbuf_mgr.c
index 19eb689cfb2..d9b39e528bb 100644
--- a/src/gallium/auxiliary/util/u_vbuf_mgr.c
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c
@@ -34,21 +34,6 @@
 #include "translate/translate.h"
 #include "translate/translate_cache.h"
 
-/* Hardware vertex fetcher limitations can be described by this structure. */
-struct u_vbuf_caps {
-   /* Vertex format CAPs. */
-   /* TRUE if hardware supports it. */
-   unsigned format_fixed32:1;    /* PIPE_FORMAT_*32*_FIXED */
-   unsigned format_float16:1;    /* PIPE_FORMAT_*16*_FLOAT */
-   unsigned format_float64:1;    /* PIPE_FORMAT_*64*_FLOAT */
-   unsigned format_norm32:1;     /* PIPE_FORMAT_*32*NORM */
-   unsigned format_scaled32:1;   /* PIPE_FORMAT_*32*SCALED */
-
-   /* Whether vertex fetches don't have to be dword-aligned. */
-   /* TRUE if hardware supports it. */
-   unsigned fetch_dword_unaligned:1;
-};
-
 struct u_vbuf_mgr_elements {
    unsigned count;
    struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
@@ -69,7 +54,6 @@ struct u_vbuf_mgr_elements {
 
 struct u_vbuf_mgr_priv {
    struct u_vbuf_mgr b;
-   struct u_vbuf_caps caps;
    struct pipe_context *pipe;
 
    struct translate_cache *translate_cache;
@@ -89,25 +73,25 @@ static void u_vbuf_mgr_init_format_caps(struct u_vbuf_mgr_priv *mgr)
 {
    struct pipe_screen *screen = mgr->pipe->screen;
 
-   mgr->caps.format_fixed32 =
+   mgr->b.caps.format_fixed32 =
       screen->is_format_supported(screen, PIPE_FORMAT_R32_FIXED, PIPE_BUFFER,
                                   0, PIPE_BIND_VERTEX_BUFFER);
 
-   mgr->caps.format_float16 =
+   mgr->b.caps.format_float16 =
       screen->is_format_supported(screen, PIPE_FORMAT_R16_FLOAT, PIPE_BUFFER,
                                   0, PIPE_BIND_VERTEX_BUFFER);
 
-   mgr->caps.format_float64 =
+   mgr->b.caps.format_float64 =
       screen->is_format_supported(screen, PIPE_FORMAT_R64_FLOAT, PIPE_BUFFER,
                                   0, PIPE_BIND_VERTEX_BUFFER);
 
-   mgr->caps.format_norm32 =
+   mgr->b.caps.format_norm32 =
       screen->is_format_supported(screen, PIPE_FORMAT_R32_UNORM, PIPE_BUFFER,
                                   0, PIPE_BIND_VERTEX_BUFFER) &&
       screen->is_format_supported(screen, PIPE_FORMAT_R32_SNORM, PIPE_BUFFER,
                                   0, PIPE_BIND_VERTEX_BUFFER);
 
-   mgr->caps.format_scaled32 =
+   mgr->b.caps.format_scaled32 =
       screen->is_format_supported(screen, PIPE_FORMAT_R32_USCALED, PIPE_BUFFER,
                                   0, PIPE_BIND_VERTEX_BUFFER) &&
       screen->is_format_supported(screen, PIPE_FORMAT_R32_SSCALED, PIPE_BUFFER,
@@ -130,7 +114,7 @@ u_vbuf_mgr_create(struct pipe_context *pipe,
                                      upload_buffer_alignment,
                                      upload_buffer_bind);
 
-   mgr->caps.fetch_dword_unaligned =
+   mgr->b.caps.fetch_dword_unaligned =
          fetch_alignment == U_VERTEX_FETCH_BYTE_ALIGNED;
 
    u_vbuf_mgr_init_format_caps(mgr);
@@ -184,7 +168,7 @@ u_vbuf_translate_begin(struct u_vbuf_mgr_priv *mgr,
 
       /* Check for support. */
       if (mgr->ve->ve[i].src_format == mgr->ve->native_format[i] &&
-          (mgr->caps.fetch_dword_unaligned ||
+          (mgr->b.caps.fetch_dword_unaligned ||
            (vb->buffer_offset % 4 == 0 &&
             vb->stride % 4 == 0 &&
             mgr->ve->ve[i].src_offset % 4 == 0))) {
@@ -365,7 +349,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb,
       /* Choose a native format.
        * For now we don't care about the alignment, that's going to
        * be sorted out later. */
-      if (!mgr->caps.format_fixed32) {
+      if (!mgr->b.caps.format_fixed32) {
          switch (format) {
             FORMAT_REPLACE(R32_FIXED,           R32_FLOAT);
             FORMAT_REPLACE(R32G32_FIXED,        R32G32_FLOAT);
@@ -374,7 +358,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb,
             default:;
          }
       }
-      if (!mgr->caps.format_float16) {
+      if (!mgr->b.caps.format_float16) {
          switch (format) {
             FORMAT_REPLACE(R16_FLOAT,           R32_FLOAT);
             FORMAT_REPLACE(R16G16_FLOAT,        R32G32_FLOAT);
@@ -383,7 +367,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb,
             default:;
          }
       }
-      if (!mgr->caps.format_float64) {
+      if (!mgr->b.caps.format_float64) {
          switch (format) {
             FORMAT_REPLACE(R64_FLOAT,           R32_FLOAT);
             FORMAT_REPLACE(R64G64_FLOAT,        R32G32_FLOAT);
@@ -392,7 +376,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb,
             default:;
          }
       }
-      if (!mgr->caps.format_norm32) {
+      if (!mgr->b.caps.format_norm32) {
          switch (format) {
             FORMAT_REPLACE(R32_UNORM,           R32_FLOAT);
             FORMAT_REPLACE(R32G32_UNORM,        R32G32_FLOAT);
@@ -405,7 +389,7 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb,
             default:;
          }
       }
-      if (!mgr->caps.format_scaled32) {
+      if (!mgr->b.caps.format_scaled32) {
          switch (format) {
             FORMAT_REPLACE(R32_USCALED,         R32_FLOAT);
             FORMAT_REPLACE(R32G32_USCALED,      R32G32_FLOAT);
@@ -427,11 +411,11 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb,
       ve->incompatible_layout =
             ve->incompatible_layout ||
             ve->ve[i].src_format != ve->native_format[i] ||
-            (!mgr->caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0);
+            (!mgr->b.caps.fetch_dword_unaligned && ve->ve[i].src_offset % 4 != 0);
    }
 
    /* Align the formats to the size of DWORD if needed. */
-   if (!mgr->caps.fetch_dword_unaligned) {
+   if (!mgr->b.caps.fetch_dword_unaligned) {
       for (i = 0; i < count; i++) {
          ve->native_format_size[i] = align(ve->native_format_size[i], 4);
       }
@@ -472,7 +456,7 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb,
    mgr->any_user_vbs = FALSE;
    mgr->incompatible_vb_layout = FALSE;
 
-   if (!mgr->caps.fetch_dword_unaligned) {
+   if (!mgr->b.caps.fetch_dword_unaligned) {
       /* Check if the strides and offsets are aligned to the size of DWORD. */
       for (i = 0; i < count; i++) {
          if (bufs[i].buffer) {
diff --git a/src/gallium/auxiliary/util/u_vbuf_mgr.h b/src/gallium/auxiliary/util/u_vbuf_mgr.h
index 4e6372435d8..c653ca4346d 100644
--- a/src/gallium/auxiliary/util/u_vbuf_mgr.h
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.h
@@ -37,6 +37,21 @@
 #include "pipe/p_state.h"
 #include "util/u_transfer.h"
 
+/* Hardware vertex fetcher limitations can be described by this structure. */
+struct u_vbuf_caps {
+   /* Vertex format CAPs. */
+   /* TRUE if hardware supports it. */
+   unsigned format_fixed32:1;    /* PIPE_FORMAT_*32*_FIXED */
+   unsigned format_float16:1;    /* PIPE_FORMAT_*16*_FLOAT */
+   unsigned format_float64:1;    /* PIPE_FORMAT_*64*_FLOAT */
+   unsigned format_norm32:1;     /* PIPE_FORMAT_*32*NORM */
+   unsigned format_scaled32:1;   /* PIPE_FORMAT_*32*SCALED */
+
+   /* Whether vertex fetches don't have to be dword-aligned. */
+   /* TRUE if hardware supports it. */
+   unsigned fetch_dword_unaligned:1;
+};
+
 /* The manager.
  * This structure should also be used to access vertex buffers
  * from a driver. */
@@ -63,6 +78,8 @@ struct u_vbuf_mgr {
     * - u_upload_buffer
     * - u_upload_flush */
    struct u_upload_mgr *uploader;
+
+   struct u_vbuf_caps caps;
 };
 
 struct u_vbuf_resource {
diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h
index 1c1089d89d2..b822cba9293 100644
--- a/src/gallium/drivers/r600/r600_formats.h
+++ b/src/gallium/drivers/r600/r600_formats.h
@@ -99,7 +99,6 @@ static INLINE bool r600_is_vertex_format_supported(enum pipe_format format)
 
 	/* No fixed, no double. */
 	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
-	    desc->channel[i].type == UTIL_FORMAT_TYPE_FIXED ||
 	    (desc->channel[i].size == 64 &&
 	     desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT))
 		return false;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 5d09d59e111..4cf02c9b18e 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -273,6 +273,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 		r600_destroy_context(&rctx->context);
 		return NULL;
 	}
+	rctx->vbuf_mgr->caps.format_fixed32 = 0;
 
 	rctx->blitter = util_blitter_create(&rctx->context);
 	if (rctx->blitter == NULL) {

From f272117def7a7962f355581d038af6d0981a23cb Mon Sep 17 00:00:00 2001
From: Cooper Yuan <cooperyuan@gmail.com>
Date: Tue, 16 Aug 2011 20:37:13 +0800
Subject: [PATCH 346/600] dri2: check if context is valid before flushing the
 pipe

---
 src/gallium/state_trackers/dri/drm/dri2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c
index 908a735234e..53638da9888 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -49,7 +49,8 @@ dri2_flush_drawable(__DRIdrawable *draw)
    struct dri_drawable *drawable = dri_drawable(draw);
    struct dri_context *ctx = dri_get_current(draw->driScreenPriv);
 
-   ctx->st->flush(ctx->st, 0, NULL);
+   if (ctx)
+      ctx->st->flush(ctx->st, 0, NULL);
 }
 
 static void

From 61d2dfbe488cf5de5881c20fe1ead97f2ab5dabb Mon Sep 17 00:00:00 2001
From: Benjamin Franzke <benjaminfranzke@googlemail.com>
Date: Tue, 16 Aug 2011 19:23:18 +0200
Subject: [PATCH 347/600] egl: Add include paths for platform autodetection

Needed since commit 85fe9484.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=40145
---
 src/egl/main/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/egl/main/Makefile b/src/egl/main/Makefile
index c100fbfeb9c..3172ad2ec03 100644
--- a/src/egl/main/Makefile
+++ b/src/egl/main/Makefile
@@ -63,6 +63,7 @@ EGL_LIB_DEPS += $(XCB_DRI2_LIBS)
 endif
 ifneq ($(findstring drm, $(EGL_PLATFORMS)),)
 EGL_LIB_DEPS += -lgbm
+INCLUDE_DIRS += -I$(TOP)/src/gbm/main
 endif
 EGL_LIB_DEPS += $(LIBUDEV_LIBS) $(DLOPEN_LIBS) $(LIBDRM_LIB) $(WAYLAND_LIBS)
 endif
@@ -70,6 +71,7 @@ endif
 
 ifneq ($(findstring wayland, $(EGL_PLATFORMS)),)
 LOCAL_LIBS += $(TOP)/src/egl/wayland/wayland-drm/libwayland-drm.a
+INCLUDE_DIRS += $(WAYLAND_CFLAGS)
 endif
 
 ifeq ($(filter glx, $(EGL_DRIVERS_DIRS)),glx)

From 4a47662beaa2092447939db7880531fb706afedd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 16 Aug 2011 19:35:10 +0200
Subject: [PATCH 348/600] r600g: rename bc -> bytecode

It took me a while to figure out what it stands for.
---
 src/gallium/drivers/r600/eg_asm.c      |   2 +-
 src/gallium/drivers/r600/r600_asm.c    | 268 +++++------
 src/gallium/drivers/r600/r600_asm.h    |  68 +--
 src/gallium/drivers/r600/r600_shader.c | 624 ++++++++++++-------------
 src/gallium/drivers/r600/r600_shader.h |   2 +-
 src/gallium/drivers/r600/r700_asm.c    |   4 +-
 6 files changed, 484 insertions(+), 484 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index c95872b0809..ca25b341ffd 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -29,7 +29,7 @@
 #include "r600_opcodes.h"
 #include "evergreend.h"
 
-int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
+int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 {
 	unsigned id = cf->id;
 
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 6092432e6f2..0311b562f27 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -36,7 +36,7 @@
 #define NUM_OF_CYCLES 3
 #define NUM_OF_COMPONENTS 4
 
-static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu)
+static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	if(alu->is_op3)
 		return 3;
@@ -152,11 +152,11 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r
 	return 3;
 }
 
-int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
+int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id);
 
-static struct r600_bc_cf *r600_bc_cf(void)
+static struct r600_bytecode_cf *r600_bytecode_cf(void)
 {
-	struct r600_bc_cf *cf = CALLOC_STRUCT(r600_bc_cf);
+	struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf);
 
 	if (cf == NULL)
 		return NULL;
@@ -167,9 +167,9 @@ static struct r600_bc_cf *r600_bc_cf(void)
 	return cf;
 }
 
-static struct r600_bc_alu *r600_bc_alu(void)
+static struct r600_bytecode_alu *r600_bytecode_alu(void)
 {
-	struct r600_bc_alu *alu = CALLOC_STRUCT(r600_bc_alu);
+	struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu);
 
 	if (alu == NULL)
 		return NULL;
@@ -177,9 +177,9 @@ static struct r600_bc_alu *r600_bc_alu(void)
 	return alu;
 }
 
-static struct r600_bc_vtx *r600_bc_vtx(void)
+static struct r600_bytecode_vtx *r600_bytecode_vtx(void)
 {
-	struct r600_bc_vtx *vtx = CALLOC_STRUCT(r600_bc_vtx);
+	struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx);
 
 	if (vtx == NULL)
 		return NULL;
@@ -187,9 +187,9 @@ static struct r600_bc_vtx *r600_bc_vtx(void)
 	return vtx;
 }
 
-static struct r600_bc_tex *r600_bc_tex(void)
+static struct r600_bytecode_tex *r600_bytecode_tex(void)
 {
-	struct r600_bc_tex *tex = CALLOC_STRUCT(r600_bc_tex);
+	struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex);
 
 	if (tex == NULL)
 		return NULL;
@@ -197,15 +197,15 @@ static struct r600_bc_tex *r600_bc_tex(void)
 	return tex;
 }
 
-void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class)
+void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class)
 {
 	LIST_INITHEAD(&bc->cf);
 	bc->chip_class = chip_class;
 }
 
-static int r600_bc_add_cf(struct r600_bc *bc)
+static int r600_bytecode_add_cf(struct r600_bytecode *bc)
 {
-	struct r600_bc_cf *cf = r600_bc_cf();
+	struct r600_bytecode_cf *cf = r600_bytecode_cf();
 
 	if (cf == NULL)
 		return -ENOMEM;
@@ -219,7 +219,7 @@ static int r600_bc_add_cf(struct r600_bc *bc)
 	return 0;
 }
 
-int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
+int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output)
 {
 	int r;
 
@@ -254,16 +254,16 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 		}
 	}
 
-	r = r600_bc_add_cf(bc);
+	r = r600_bytecode_add_cf(bc);
 	if (r)
 		return r;
 	bc->cf_last->inst = output->inst;
-	memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output));
+	memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output));
 	return 0;
 }
 
 /* alu instructions that can ony exits once per group */
-static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	switch (bc->chip_class) {
 	case R600:
@@ -344,7 +344,7 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 	}
 }
 
-static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	switch (bc->chip_class) {
 	case R600:
@@ -365,7 +365,7 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 	}
 }
 
-static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_cube_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	switch (bc->chip_class) {
 	case R600:
@@ -380,7 +380,7 @@ static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 	}
 }
 
-static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	switch (bc->chip_class) {
 	case R600:
@@ -398,7 +398,7 @@ static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 }
 
 /* alu instructions that can only execute on the vector unit */
-static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	return is_alu_reduction_inst(bc, alu) ||
 		is_alu_mova_inst(bc, alu) ||
@@ -407,7 +407,7 @@ static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 }
 
 /* alu instructions that can only execute on the trans unit */
-static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	switch (bc->chip_class) {
 	case R600:
@@ -478,23 +478,23 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 }
 
 /* alu instructions that can execute on any unit */
-static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
 {
 	return !is_alu_vec_unit_inst(bc, alu) &&
 		!is_alu_trans_unit_inst(bc, alu);
 }
 
-static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first,
-			    struct r600_bc_alu *assignment[5])
+static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first,
+			    struct r600_bytecode_alu *assignment[5])
 {
-	struct r600_bc_alu *alu;
+	struct r600_bytecode_alu *alu;
 	unsigned i, chan, trans;
 	int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
 
 	for (i = 0; i < max_slots; i++)
 		assignment[i] = NULL;
 
-	for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
+	for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bytecode_alu, alu->list.next, list)) {
 		chan = alu->dst.chan;
 		if (max_slots == 4)
 			trans = 0;
@@ -573,7 +573,7 @@ static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan,
 	return 0;
 }
 
-static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
+static int reserve_cfile(struct r600_bytecode *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
 {
 	int res, num_res = 4;
 	if (bc->chip_class >= R700) {
@@ -615,12 +615,12 @@ static int is_const(int sel)
 		sel <= V_SQ_ALU_SRC_LITERAL);
 }
 
-static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu,
+static int check_vector(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
 			struct alu_bank_swizzle *bs, int bank_swizzle)
 {
 	int r, src, num_src, sel, elem, cycle;
 
-	num_src = r600_bc_get_num_operands(bc, alu);
+	num_src = r600_bytecode_get_num_operands(bc, alu);
 	for (src = 0; src < num_src; src++) {
 		sel = alu->src[src].sel;
 		elem = alu->src[src].chan;
@@ -645,12 +645,12 @@ static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu,
 	return 0;
 }
 
-static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu,
+static int check_scalar(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
 			struct alu_bank_swizzle *bs, int bank_swizzle)
 {
 	int r, src, num_src, const_count, sel, elem, cycle;
 
-	num_src = r600_bc_get_num_operands(bc, alu);
+	num_src = r600_bytecode_get_num_operands(bc, alu);
 	for (const_count = 0, src = 0; src < num_src; ++src) {
 		sel = alu->src[src].sel;
 		elem = alu->src[src].chan;
@@ -691,8 +691,8 @@ static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu,
 	return 0;
 }
 
-static int check_and_set_bank_swizzle(struct r600_bc *bc,
-				      struct r600_bc_alu *slots[5])
+static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
+				      struct r600_bytecode_alu *slots[5])
 {
 	struct alu_bank_swizzle bs;
 	int bank_swizzle[5];
@@ -764,10 +764,10 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc,
 	return -1;
 }
 
-static int replace_gpr_with_pv_ps(struct r600_bc *bc,
-				  struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
+static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
+				  struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev)
 {
-	struct r600_bc_alu *prev[5];
+	struct r600_bytecode_alu *prev[5];
 	int gpr[5], chan[5];
 	int i, j, r, src, num_src;
 	int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
@@ -789,11 +789,11 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc,
 	}
 
 	for (i = 0; i < max_slots; ++i) {
-		struct r600_bc_alu *alu = slots[i];
+		struct r600_bytecode_alu *alu = slots[i];
 		if(!alu)
 			continue;
 
-		num_src = r600_bc_get_num_operands(bc, alu);
+		num_src = r600_bytecode_get_num_operands(bc, alu);
 		for (src = 0; src < num_src; ++src) {
 			if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
 				continue;
@@ -821,7 +821,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc,
 	return 0;
 }
 
-void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg)
+void r600_bytecode_special_constants(u32 value, unsigned *sel, unsigned *neg)
 {
 	switch(value) {
 	case 0:
@@ -854,10 +854,10 @@ void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg)
 }
 
 /* compute how many literal are needed */
-static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu,
+static int r600_bytecode_alu_nliterals(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
 				 uint32_t literal[4], unsigned *nliteral)
 {
-	unsigned num_src = r600_bc_get_num_operands(bc, alu);
+	unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
 	unsigned i, j;
 
 	for (i = 0; i < num_src; ++i) {
@@ -880,11 +880,11 @@ static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu,
 	return 0;
 }
 
-static void r600_bc_alu_adjust_literals(struct r600_bc *bc,
-					struct r600_bc_alu *alu,
+static void r600_bytecode_alu_adjust_literals(struct r600_bytecode *bc,
+					struct r600_bytecode_alu *alu,
 					uint32_t literal[4], unsigned nliteral)
 {
-	unsigned num_src = r600_bc_get_num_operands(bc, alu);
+	unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
 	unsigned i, j;
 
 	for (i = 0; i < num_src; ++i) {
@@ -900,11 +900,11 @@ static void r600_bc_alu_adjust_literals(struct r600_bc *bc,
 	}
 }
 
-static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
-			     struct r600_bc_alu *alu_prev)
+static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5],
+			     struct r600_bytecode_alu *alu_prev)
 {
-	struct r600_bc_alu *prev[5];
-	struct r600_bc_alu *result[5] = { NULL };
+	struct r600_bytecode_alu *prev[5];
+	struct r600_bytecode_alu *result[5] = { NULL };
 
 	uint32_t literal[4], prev_literal[4];
 	unsigned nliteral = 0, prev_nliteral = 0;
@@ -919,13 +919,13 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
 		return r;
 
 	for (i = 0; i < max_slots; ++i) {
-		struct r600_bc_alu *alu;
+		struct r600_bytecode_alu *alu;
 
 		/* check number of literals */
 		if (prev[i]) {
-			if (r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral))
+			if (r600_bytecode_alu_nliterals(bc, prev[i], literal, &nliteral))
 				return 0;
-			if (r600_bc_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral))
+			if (r600_bytecode_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral))
 				return 0;
 			if (is_alu_mova_inst(bc, prev[i])) {
 				if (have_rel)
@@ -934,7 +934,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
 			}
 			num_once_inst += is_alu_once_inst(bc, prev[i]);
 		}
-		if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral))
+		if (slots[i] && r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral))
 			return 0;
 
 		/* Let's check used slots. */
@@ -970,7 +970,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
 		}
 
 		/* Let's check source gprs */
-		num_src = r600_bc_get_num_operands(bc, alu);
+		num_src = r600_bytecode_get_num_operands(bc, alu);
 		for (src = 0; src < num_src; ++src) {
 			if (alu->src[src].rel) {
 				if (have_mova)
@@ -1020,7 +1020,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
 	}
 
 	/* determine new last instruction */
-	LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1;
+	LIST_ENTRY(struct r600_bytecode_alu, bc->cf_last->alu.prev, list)->last = 1;
 
 	/* determine new first instruction */
 	for (i = 0; i < max_slots; ++i) {
@@ -1040,9 +1040,9 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
  * probably do slightly better by recognizing that we actually have two
  * consecutive lines of 16 constants, but the resulting code would also be
  * somewhat more complicated. */
-static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type)
+static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, int type)
 {
-	struct r600_bc_kcache *kcache = bc->cf_last->kcache;
+	struct r600_bytecode_kcache *kcache = bc->cf_last->kcache;
 	unsigned int required_lines;
 	unsigned int free_lines = 0;
 	unsigned int cache_line[3];
@@ -1095,7 +1095,7 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al
 
 	/* Start a new ALU clause if needed. */
 	if (required_lines > free_lines) {
-		if ((r = r600_bc_add_cf(bc))) {
+		if ((r = r600_bytecode_add_cf(bc))) {
 			return r;
 		}
 		bc->cf_last->inst = (type << 3);
@@ -1150,15 +1150,15 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al
 	return 0;
 }
 
-int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
+int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type)
 {
-	struct r600_bc_alu *nalu = r600_bc_alu();
-	struct r600_bc_alu *lalu;
+	struct r600_bytecode_alu *nalu = r600_bytecode_alu();
+	struct r600_bytecode_alu *lalu;
 	int i, r;
 
 	if (nalu == NULL)
 		return -ENOMEM;
-	memcpy(nalu, alu, sizeof(struct r600_bc_alu));
+	memcpy(nalu, alu, sizeof(struct r600_bytecode_alu));
 
 	if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
 		/* check if we could add it anyway */
@@ -1176,7 +1176,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 
 	/* cf can contains only alu or only vtx or only tex */
 	if (bc->cf_last == NULL || bc->force_add_cf) {
-		r = r600_bc_add_cf(bc);
+		r = r600_bytecode_add_cf(bc);
 		if (r) {
 			free(nalu);
 			return r;
@@ -1186,7 +1186,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 
 	/* Setup the kcache for this ALU instruction. This will start a new
 	 * ALU clause if needed. */
-	if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) {
+	if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) {
 		free(nalu);
 		return r;
 	}
@@ -1200,7 +1200,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 			bc->ngpr = nalu->src[i].sel + 1;
 		}
 		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
-			r600_bc_special_constants(nalu->src[i].value,
+			r600_bytecode_special_constants(nalu->src[i].value,
 				&nalu->src[i].sel, &nalu->src[i].neg);
 	}
 	if (nalu->dst.sel >= bc->ngpr) {
@@ -1215,7 +1215,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 	if (nalu->last) {
 		uint32_t literal[4];
 		unsigned nliteral;
-		struct r600_bc_alu *slots[5];
+		struct r600_bytecode_alu *slots[5];
 		int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
 		r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
 		if (r)
@@ -1239,7 +1239,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 
 		for (i = 0, nliteral = 0; i < max_slots; i++) {
 			if (slots[i]) {
-				r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral);
+				r = r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral);
 				if (r)
 					return r;
 			}
@@ -1259,12 +1259,12 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 	return 0;
 }
 
-int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu)
 {
-	return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+	return r600_bytecode_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
 }
 
-static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc)
+static unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc)
 {
 	switch (bc->chip_class) {
 	case R600:
@@ -1283,7 +1283,7 @@ static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc)
 	}
 }
 
-static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc)
+static inline boolean last_inst_was_vtx_fetch(struct r600_bytecode *bc)
 {
 	if (bc->chip_class == CAYMAN) {
 		if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC)
@@ -1296,20 +1296,20 @@ static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc)
 	return FALSE;
 }
 
-int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
+int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
 {
-	struct r600_bc_vtx *nvtx = r600_bc_vtx();
+	struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx();
 	int r;
 
 	if (nvtx == NULL)
 		return -ENOMEM;
-	memcpy(nvtx, vtx, sizeof(struct r600_bc_vtx));
+	memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx));
 
 	/* cf can contains only alu or only vtx or only tex */
 	if (bc->cf_last == NULL ||
 	    last_inst_was_vtx_fetch(bc) ||
 	    bc->force_add_cf) {
-		r = r600_bc_add_cf(bc);
+		r = r600_bytecode_add_cf(bc);
 		if (r) {
 			free(nvtx);
 			return r;
@@ -1323,24 +1323,24 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
 	/* each fetch use 4 dwords */
 	bc->cf_last->ndw += 4;
 	bc->ndw += 4;
-	if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc))
+	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
 		bc->force_add_cf = 1;
 	return 0;
 }
 
-int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
+int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex)
 {
-	struct r600_bc_tex *ntex = r600_bc_tex();
+	struct r600_bytecode_tex *ntex = r600_bytecode_tex();
 	int r;
 
 	if (ntex == NULL)
 		return -ENOMEM;
-	memcpy(ntex, tex, sizeof(struct r600_bc_tex));
+	memcpy(ntex, tex, sizeof(struct r600_bytecode_tex));
 
 	/* we can't fetch data und use it as texture lookup address in the same TEX clause */
 	if (bc->cf_last != NULL &&
 		bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
-		struct r600_bc_tex *ttex;
+		struct r600_bytecode_tex *ttex;
 		LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
 			if (ttex->dst_gpr == ntex->src_gpr) {
 				bc->force_add_cf = 1;
@@ -1356,7 +1356,7 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
 	if (bc->cf_last == NULL ||
 		bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX ||
 	        bc->force_add_cf) {
-		r = r600_bc_add_cf(bc);
+		r = r600_bytecode_add_cf(bc);
 		if (r) {
 			free(ntex);
 			return r;
@@ -1373,15 +1373,15 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
 	/* each texture fetch use 4 dwords */
 	bc->cf_last->ndw += 4;
 	bc->ndw += 4;
-	if ((bc->cf_last->ndw / 4) >= r600_bc_num_tex_and_vtx_instructions(bc))
+	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
 		bc->force_add_cf = 1;
 	return 0;
 }
 
-int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
+int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst)
 {
 	int r;
-	r = r600_bc_add_cf(bc);
+	r = r600_bytecode_add_cf(bc);
 	if (r)
 		return r;
 
@@ -1390,13 +1390,13 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
 	return 0;
 }
 
-int cm_bc_add_cf_end(struct r600_bc *bc)
+int cm_bytecode_add_cf_end(struct r600_bytecode *bc)
 {
-	return r600_bc_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END);
+	return r600_bytecode_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END);
 }
 
 /* common to all 3 families */
-static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
+static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id)
 {
 	bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
 			S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
@@ -1425,7 +1425,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign
 }
 
 /* common to all 3 families */
-static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsigned id)
+static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id)
 {
 	bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) |
 				S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
@@ -1455,7 +1455,7 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign
 }
 
 /* r600 only, r700/eg bits in r700_asm.c */
-static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
+static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
 {
 	/* don't replace gpr by pv or ps for destination register */
 	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
@@ -1496,7 +1496,7 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
 	return 0;
 }
 
-static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf)
+static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf)
 {
 	*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
 	*bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) |
@@ -1505,7 +1505,7 @@ static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf
 }
 
 /* common for r600/r700 - eg in eg_asm.c */
-static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
+static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 {
 	unsigned id = cf->id;
 
@@ -1531,9 +1531,9 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 	case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
 	case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
 		if (bc->chip_class == R700)
-			r700_bc_cf_vtx_build(&bc->bytecode[id], cf);
+			r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
 		else
-			r600_bc_cf_vtx_build(&bc->bytecode[id], cf);
+			r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
 		break;
 	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
 	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
@@ -1573,12 +1573,12 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 	return 0;
 }
 
-int r600_bc_build(struct r600_bc *bc)
+int r600_bytecode_build(struct r600_bytecode *bc)
 {
-	struct r600_bc_cf *cf;
-	struct r600_bc_alu *alu;
-	struct r600_bc_vtx *vtx;
-	struct r600_bc_tex *tex;
+	struct r600_bytecode_cf *cf;
+	struct r600_bytecode_alu *alu;
+	struct r600_bytecode_vtx *vtx;
+	struct r600_bytecode_tex *tex;
 	uint32_t literal[4];
 	unsigned nliteral;
 	unsigned addr;
@@ -1638,9 +1638,9 @@ int r600_bc_build(struct r600_bc *bc)
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
 		addr = cf->addr;
 		if (bc->chip_class >= EVERGREEN)
-			r = eg_bc_cf_build(bc, cf);
+			r = eg_bytecode_cf_build(bc, cf);
 		else
-			r = r600_bc_cf_build(bc, cf);
+			r = r600_bytecode_cf_build(bc, cf);
 		if (r)
 			return r;
 		switch (cf->inst) {
@@ -1651,18 +1651,18 @@ int r600_bc_build(struct r600_bc *bc)
 			nliteral = 0;
 			memset(literal, 0, sizeof(literal));
 			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
-				r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+				r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
 				if (r)
 					return r;
-				r600_bc_alu_adjust_literals(bc, alu, literal, nliteral);
+				r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral);
 				switch(bc->chip_class) {
 				case R600:
-					r = r600_bc_alu_build(bc, alu, addr);
+					r = r600_bytecode_alu_build(bc, alu, addr);
 					break;
 				case R700:
 				case EVERGREEN: /* eg alu is same encoding as r700 */
 				case CAYMAN: /* eg alu is same encoding as r700 */
-					r = r700_bc_alu_build(bc, alu, addr);
+					r = r700_bytecode_alu_build(bc, alu, addr);
 					break;
 				default:
 					R600_ERR("unknown chip class %d.\n", bc->chip_class);
@@ -1683,7 +1683,7 @@ int r600_bc_build(struct r600_bc *bc)
 		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
 		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
 			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
-				r = r600_bc_vtx_build(bc, vtx, addr);
+				r = r600_bytecode_vtx_build(bc, vtx, addr);
 				if (r)
 					return r;
 				addr += 4;
@@ -1692,14 +1692,14 @@ int r600_bc_build(struct r600_bc *bc)
 		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
 			if (bc->chip_class == CAYMAN) {
 				LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
-					r = r600_bc_vtx_build(bc, vtx, addr);
+					r = r600_bytecode_vtx_build(bc, vtx, addr);
 					if (r)
 						return r;
 					addr += 4;
 				}
 			}
 			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
-				r = r600_bc_tex_build(bc, tex, addr);
+				r = r600_bytecode_tex_build(bc, tex, addr);
 				if (r)
 					return r;
 				addr += 4;
@@ -1728,17 +1728,17 @@ int r600_bc_build(struct r600_bc *bc)
 	return 0;
 }
 
-void r600_bc_clear(struct r600_bc *bc)
+void r600_bytecode_clear(struct r600_bytecode *bc)
 {
-	struct r600_bc_cf *cf = NULL, *next_cf;
+	struct r600_bytecode_cf *cf = NULL, *next_cf;
 
 	free(bc->bytecode);
 	bc->bytecode = NULL;
 
 	LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
-		struct r600_bc_alu *alu = NULL, *next_alu;
-		struct r600_bc_tex *tex = NULL, *next_tex;
-		struct r600_bc_tex *vtx = NULL, *next_vtx;
+		struct r600_bytecode_alu *alu = NULL, *next_alu;
+		struct r600_bytecode_tex *tex = NULL, *next_tex;
+		struct r600_bytecode_tex *vtx = NULL, *next_vtx;
 
 		LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
 			free(alu);
@@ -1764,12 +1764,12 @@ void r600_bc_clear(struct r600_bc *bc)
 	LIST_INITHEAD(&cf->list);
 }
 
-void r600_bc_dump(struct r600_bc *bc)
+void r600_bytecode_dump(struct r600_bytecode *bc)
 {
-	struct r600_bc_cf *cf = NULL;
-	struct r600_bc_alu *alu = NULL;
-	struct r600_bc_vtx *vtx = NULL;
-	struct r600_bc_tex *tex = NULL;
+	struct r600_bytecode_cf *cf = NULL;
+	struct r600_bytecode_alu *alu = NULL;
+	struct r600_bytecode_vtx *vtx = NULL;
+	struct r600_bytecode_tex *tex = NULL;
 
 	unsigned i, id;
 	uint32_t literal[4];
@@ -1868,7 +1868,7 @@ void r600_bc_dump(struct r600_bc *bc)
 		id = cf->addr;
 		nliteral = 0;
 		LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
-			r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+			r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
 
 			fprintf(stderr, "%04d %08X   ", id, bc->bytecode[id]);
 			fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
@@ -2122,8 +2122,8 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 {
 	static int dump_shaders = -1;
 
-	struct r600_bc bc;
-	struct r600_bc_vtx vtx;
+	struct r600_bytecode bc;
+	struct r600_bytecode_vtx vtx;
 	struct pipe_vertex_element *elements = ve->elements;
 	const struct util_format_description *desc;
 	unsigned fetch_resource_start = rctx->chip_class >= EVERGREEN ? 0 : 160;
@@ -2144,11 +2144,11 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 	}
 
 	memset(&bc, 0, sizeof(bc));
-	r600_bc_init(&bc, rctx->chip_class);
+	r600_bytecode_init(&bc, rctx->chip_class);
 
 	for (i = 0; i < ve->count; i++) {
 		if (elements[i].instance_divisor > 1) {
-			struct r600_bc_alu alu;
+			struct r600_bytecode_alu alu;
 
 			memset(&alu, 0, sizeof(alu));
 			alu.inst = BC_INST(&bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT);
@@ -2163,8 +2163,8 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 			alu.dst.write = 1;
 			alu.last = 1;
 
-			if ((r = r600_bc_add_alu(&bc, &alu))) {
-				r600_bc_clear(&bc);
+			if ((r = r600_bytecode_add_alu(&bc, &alu))) {
+				r600_bytecode_clear(&bc);
 				return r;
 			}
 		}
@@ -2175,7 +2175,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 		r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp, &endian);
 		desc = util_format_description(ve->elements[i].src_format);
 		if (desc == NULL) {
-			r600_bc_clear(&bc);
+			r600_bytecode_clear(&bc);
 			R600_ERR("unknown format %d\n", ve->elements[i].src_format);
 			return -EINVAL;
 		}
@@ -2200,16 +2200,16 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 		vtx.offset = elements[i].src_offset;
 		vtx.endian = endian;
 
-		if ((r = r600_bc_add_vtx(&bc, &vtx))) {
-			r600_bc_clear(&bc);
+		if ((r = r600_bytecode_add_vtx(&bc, &vtx))) {
+			r600_bytecode_clear(&bc);
 			return r;
 		}
 	}
 
-	r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
+	r600_bytecode_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
 
-	if ((r = r600_bc_build(&bc))) {
-		r600_bc_clear(&bc);
+	if ((r = r600_bytecode_build(&bc))) {
+		r600_bytecode_clear(&bc);
 		return r;
 	}
 
@@ -2218,7 +2218,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 
 	if (dump_shaders) {
 		fprintf(stderr, "--------------------------------------------------------------\n");
-		r600_bc_dump(&bc);
+		r600_bytecode_dump(&bc);
 		fprintf(stderr, "______________________________________________________________\n");
 	}
 
@@ -2227,13 +2227,13 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 	/* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
 	ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE);
 	if (ve->fetch_shader == NULL) {
-		r600_bc_clear(&bc);
+		r600_bytecode_clear(&bc);
 		return -ENOMEM;
 	}
 
 	bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, rctx->ctx.cs, PIPE_TRANSFER_WRITE);
 	if (bytecode == NULL) {
-		r600_bc_clear(&bc);
+		r600_bytecode_clear(&bc);
 		r600_bo_reference(&ve->fetch_shader, NULL);
 		return -ENOMEM;
 	}
@@ -2247,7 +2247,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 	}
 
 	r600_bo_unmap(rctx->radeon, ve->fetch_shader);
-	r600_bc_clear(&bc);
+	r600_bytecode_clear(&bc);
 
 	if (rctx->chip_class >= EVERGREEN)
 		evergreen_fetch_shader(&rctx->context, ve);
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 5dec95acf1d..61caa4b915e 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -26,7 +26,7 @@
 struct r600_vertex_element;
 struct r600_pipe_context;
 
-struct r600_bc_alu_src {
+struct r600_bytecode_alu_src {
 	unsigned			sel;
 	unsigned			chan;
 	unsigned			neg;
@@ -35,7 +35,7 @@ struct r600_bc_alu_src {
 	uint32_t			value;
 };
 
-struct r600_bc_alu_dst {
+struct r600_bytecode_alu_dst {
 	unsigned			sel;
 	unsigned			chan;
 	unsigned			clamp;
@@ -43,10 +43,10 @@ struct r600_bc_alu_dst {
 	unsigned			rel;
 };
 
-struct r600_bc_alu {
+struct r600_bytecode_alu {
 	struct list_head		list;
-	struct r600_bc_alu_src		src[3];
-	struct r600_bc_alu_dst		dst;
+	struct r600_bytecode_alu_src		src[3];
+	struct r600_bytecode_alu_dst		dst;
 	unsigned			inst;
 	unsigned			last;
 	unsigned			is_op3;
@@ -56,7 +56,7 @@ struct r600_bc_alu {
 	unsigned			omod;
 };
 
-struct r600_bc_tex {
+struct r600_bytecode_tex {
 	struct list_head		list;
 	unsigned			inst;
 	unsigned			resource_id;
@@ -83,7 +83,7 @@ struct r600_bc_tex {
 	unsigned			src_sel_w;
 };
 
-struct r600_bc_vtx {
+struct r600_bytecode_vtx {
 	struct list_head		list;
 	unsigned			inst;
 	unsigned			fetch_type;
@@ -105,7 +105,7 @@ struct r600_bc_vtx {
 	unsigned			endian;
 };
 
-struct r600_bc_output {
+struct r600_bytecode_output {
 	unsigned			array_base;
 	unsigned			type;
 	unsigned			end_of_program;
@@ -120,13 +120,13 @@ struct r600_bc_output {
 	unsigned			barrier;
 };
 
-struct r600_bc_kcache {
+struct r600_bytecode_kcache {
 	unsigned			bank;
 	unsigned			mode;
 	unsigned			addr;
 };
 
-struct r600_bc_cf {
+struct r600_bytecode_cf {
 	struct list_head		list;
 	unsigned			inst;
 	unsigned			addr;
@@ -135,15 +135,15 @@ struct r600_bc_cf {
 	unsigned			cond;
 	unsigned			pop_count;
 	unsigned			cf_addr; /* control flow addr */
-	struct r600_bc_kcache		kcache[2];
+	struct r600_bytecode_kcache		kcache[2];
 	unsigned			r6xx_uses_waterfall;
 	struct list_head		alu;
 	struct list_head		tex;
 	struct list_head		vtx;
-	struct r600_bc_output		output;
-	struct r600_bc_alu		*curr_bs_head;
-	struct r600_bc_alu		*prev_bs_head;
-	struct r600_bc_alu		*prev2_bs_head;
+	struct r600_bytecode_output		output;
+	struct r600_bytecode_alu		*curr_bs_head;
+	struct r600_bytecode_alu		*prev_bs_head;
+	struct r600_bytecode_alu		*prev2_bs_head;
 };
 
 #define FC_NONE				0
@@ -155,8 +155,8 @@ struct r600_bc_cf {
 
 struct r600_cf_stack_entry {
 	int				type;
-	struct r600_bc_cf		*start;
-	struct r600_bc_cf		**mid; /* used to store the else point */
+	struct r600_bytecode_cf		*start;
+	struct r600_bytecode_cf		**mid; /* used to store the else point */
 	int				num_mid;
 };
 
@@ -168,11 +168,11 @@ struct r600_cf_callstack {
 	int				max;
 };
 
-struct r600_bc {
+struct r600_bytecode {
 	enum chip_class			chip_class;
 	int				type;
 	struct list_head		cf;
-	struct r600_bc_cf		*cf_last;
+	struct r600_bytecode_cf		*cf_last;
 	unsigned			ndw;
 	unsigned			ncf;
 	unsigned			ngpr;
@@ -187,27 +187,27 @@ struct r600_bc {
 };
 
 /* eg_asm.c */
-int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
+int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
 
 /* r600_asm.c */
-void r600_bc_init(struct r600_bc *bc, enum chip_class chip_class);
-void r600_bc_clear(struct r600_bc *bc);
-int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu);
-int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
-int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
-int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
-int r600_bc_build(struct r600_bc *bc);
-int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
-int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
-void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);
-void r600_bc_dump(struct r600_bc *bc);
+void r600_bytecode_init(struct r600_bytecode *bc, enum chip_class chip_class);
+void r600_bytecode_clear(struct r600_bytecode *bc);
+int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu);
+int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx);
+int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex);
+int r600_bytecode_add_output(struct r600_bytecode *bc, const struct r600_bytecode_output *output);
+int r600_bytecode_build(struct r600_bytecode *bc);
+int r600_bytecode_add_cfinst(struct r600_bytecode *bc, int inst);
+int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type);
+void r600_bytecode_special_constants(u32 value, unsigned *sel, unsigned *neg);
+void r600_bytecode_dump(struct r600_bytecode *bc);
 
-int cm_bc_add_cf_end(struct r600_bc *bc);
+int cm_bytecode_add_cf_end(struct r600_bytecode *bc);
 
 int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve);
 
 /* r700_asm.c */
-void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf);
-int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
+void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf);
+int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id);
 
 #endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index f86804eadcf..406e87bdb00 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -140,13 +140,13 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
 		R600_ERR("translation from TGSI failed !\n");
 		return r;
 	}
-	r = r600_bc_build(&shader->shader.bc);
+	r = r600_bytecode_build(&shader->shader.bc);
 	if (r) {
 		R600_ERR("building bytecode failed !\n");
 		return r;
 	}
 	if (dump_shaders) {
-		r600_bc_dump(&shader->shader.bc);
+		r600_bytecode_dump(&shader->shader.bc);
 		fprintf(stderr, "______________________________________________________________\n");
 	}
 	return r600_pipe_shader(ctx, shader);
@@ -155,7 +155,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
 {
 	r600_bo_reference(&shader->bo, NULL);
-	r600_bc_clear(&shader->shader.bc);
+	r600_bytecode_clear(&shader->shader.bc);
 
 	memset(&shader->shader,0,sizeof(struct r600_shader));
 }
@@ -183,7 +183,7 @@ struct r600_shader_ctx {
 	unsigned				temp_reg;
 	unsigned				ar_reg;
 	struct r600_shader_tgsi_instruction	*inst_info;
-	struct r600_bc				*bc;
+	struct r600_bytecode				*bc;
 	struct r600_shader			*shader;
 	struct r600_shader_src			src[4];
 	u32					*literals;
@@ -244,7 +244,7 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
 {
 	int i, r;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int gpr = 0, base_chan = 0;
 	int ij_index = 0;
 
@@ -270,7 +270,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
 	base_chan = (2 * (ij_index % 2)) + 1;
 
 	for (i = 0; i < 8; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		if (i < 4)
 			alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW;
@@ -292,7 +292,7 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
 		alu.bank_swizzle_force = SQ_ALU_VEC_210;
 		if ((i % 4) == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -345,8 +345,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 
 	case TGSI_FILE_SYSTEM_VALUE:
 		if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
-			struct r600_bc_alu alu;
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			struct r600_bytecode_alu alu;
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
 			alu.src[0].sel = 0;
@@ -357,7 +357,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 			alu.dst.write = 1;
 			alu.last = 1;
 
-			if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+			if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
 				return r;
 			break;
 		}
@@ -440,7 +440,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
 			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
 
 			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
-			r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
 			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
 				return;
 		}
@@ -464,12 +464,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
 
 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
 {
-	struct r600_bc_vtx vtx;
+	struct r600_bytecode_vtx vtx;
 	unsigned int ar_reg;
 	int r;
 
 	if (offset) {
-		struct r600_bc_alu alu;
+		struct r600_bytecode_alu alu;
 
 		memset(&alu, 0, sizeof(alu));
 
@@ -483,7 +483,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
 			return r;
 
 		ar_reg = dst_reg;
@@ -506,7 +506,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset
 	vtx.srf_mode_all = 1;		/* SRF_MODE_NO_ZERO */
 	vtx.endian = r600_endian_swap(32);
 
-	if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
+	if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
 		return r;
 
 	return 0;
@@ -515,7 +515,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset
 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, j, k, nconst, r;
 
 	for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
@@ -540,7 +540,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx)
 		} else if (j > 0) {
 			int treg = r600_get_temp(ctx);
 			for (k = 0; k < 4; k++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 				alu.src[0].sel = ctx->src[i].sel;
 				alu.src[0].chan = k;
@@ -550,7 +550,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx)
 				alu.dst.write = 1;
 				if (k == 3)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
@@ -566,7 +566,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx)
 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, j, k, nliteral, r;
 
 	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
@@ -578,7 +578,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
 		if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
 			int treg = r600_get_temp(ctx);
 			for (k = 0; k < 4; k++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 				alu.src[0].sel = ctx->src[i].sel;
 				alu.src[0].chan = k;
@@ -588,7 +588,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
 				alu.dst.write = 1;
 				if (k == 3)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
@@ -606,14 +606,14 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 	struct tgsi_full_immediate *immediate;
 	struct tgsi_full_property *property;
 	struct r600_shader_ctx ctx;
-	struct r600_bc_output output[32];
+	struct r600_bytecode_output output[32];
 	unsigned output_done, noutput;
 	unsigned opcode;
 	int i, j, r = 0, pos0;
 
 	ctx.bc = &shader->bc;
 	ctx.shader = shader;
-	r600_bc_init(ctx.bc, rctx->chip_class);
+	r600_bytecode_init(ctx.bc, rctx->chip_class);
 	ctx.tokens = tokens;
 	tgsi_scan_shader(tokens, &ctx.info);
 	tgsi_parse_init(&ctx.parse, tokens);
@@ -653,9 +653,9 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
 		ctx.file_offset[TGSI_FILE_INPUT] = 1;
 		if (ctx.bc->chip_class >= EVERGREEN) {
-			r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
+			r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
 		} else {
-			r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
+			r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS);
 		}
 	}
 	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
@@ -746,8 +746,8 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 
 				int j;
 				for (j = 0; j < 4; j++) {
-					struct r600_bc_alu alu;
-					memset(&alu, 0, sizeof(struct r600_bc_alu));
+					struct r600_bytecode_alu alu;
+					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 					/* MOV_SAT R, R */
 					alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
@@ -761,7 +761,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 					if (j == 3) {
 						alu.last = 1;
 					}
-					r = r600_bc_add_alu(ctx.bc, &alu);
+					r = r600_bytecode_add_alu(ctx.bc, &alu);
 					if (r)
 						return r;
 				}
@@ -772,7 +772,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 	/* export output */
 	j = 0;
 	for (i = 0, pos0 = 0; i < noutput; i++) {
-		memset(&output[i], 0, sizeof(struct r600_bc_output));
+		memset(&output[i], 0, sizeof(struct r600_bytecode_output));
 		output[i + j].gpr = shader->output[i].gpr;
 		output[i + j].elem_size = 3;
 		output[i + j].swizzle_x = 0;
@@ -805,7 +805,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 				output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
 				if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
 					for (j = 1; j < shader->nr_cbufs; j++) {
-						memset(&output[i + j], 0, sizeof(struct r600_bc_output));
+						memset(&output[i + j], 0, sizeof(struct r600_bytecode_output));
 						output[i + j].gpr = shader->output[i].gpr;
 						output[i + j].elem_size = 3;
 						output[i + j].swizzle_x = 0;
@@ -854,7 +854,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 			}
 		}
 		if (!pos0) {
-			memset(&output[i], 0, sizeof(struct r600_bc_output));
+			memset(&output[i], 0, sizeof(struct r600_bytecode_output));
 			output[i].gpr = 0;
 			output[i].elem_size = 3;
 			output[i].swizzle_x = 0;
@@ -871,7 +871,7 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 	}
 	/* add fake pixel export */
 	if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) {
-		memset(&output[0], 0, sizeof(struct r600_bc_output));
+		memset(&output[0], 0, sizeof(struct r600_bytecode_output));
 		output[0].gpr = 0;
 		output[0].elem_size = 3;
 		output[0].swizzle_x = 7;
@@ -899,13 +899,13 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
 	}
 	/* add output to bytecode */
 	for (i = 0; i < noutput; i++) {
-		r = r600_bc_add_output(ctx.bc, &output[i]);
+		r = r600_bytecode_add_output(ctx.bc, &output[i]);
 		if (r)
 			goto out_err;
 	}
 	/* add program end */
 	if (ctx.bc->chip_class == CAYMAN)
-		cm_bc_add_cf_end(ctx.bc);
+		cm_bytecode_add_cf_end(ctx.bc);
 
 	free(ctx.literals);
 	tgsi_parse_free(&ctx.parse);
@@ -928,7 +928,7 @@ static int tgsi_end(struct r600_shader_ctx *ctx)
 	return 0;
 }
 
-static void r600_bc_src(struct r600_bc_alu_src *bc_src,
+static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
 			const struct r600_shader_src *shader_src,
 			unsigned chan)
 {
@@ -940,13 +940,13 @@ static void r600_bc_src(struct r600_bc_alu_src *bc_src,
 	bc_src->value = shader_src->value[bc_src->chan];
 }
 
-static void r600_bc_src_set_abs(struct r600_bc_alu_src *bc_src)
+static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
 {
 	bc_src->abs = 1;
 	bc_src->neg = 0;
 }
 
-static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src)
+static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src)
 {
 	bc_src->neg = !bc_src->neg;
 }
@@ -954,7 +954,7 @@ static void r600_bc_src_toggle_neg(struct r600_bc_alu_src *bc_src)
 static void tgsi_dst(struct r600_shader_ctx *ctx,
 		     const struct tgsi_full_dst_register *tgsi_dst,
 		     unsigned swizzle,
-		     struct r600_bc_alu_dst *r600_dst)
+		     struct r600_bytecode_alu_dst *r600_dst)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 
@@ -984,7 +984,7 @@ static int tgsi_last_instruction(unsigned writemask)
 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, j, r;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 
@@ -992,25 +992,25 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 
 		alu.inst = ctx->inst_info->r600_opcode;
 		if (!swap) {
 			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
-				r600_bc_src(&alu.src[j], &ctx->src[j], i);
+				r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
 			}
 		} else {
-			r600_bc_src(&alu.src[0], &ctx->src[1], i);
-			r600_bc_src(&alu.src[1], &ctx->src[0], i);
+			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
+			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
 		}
 		/* handle some special cases */
 		switch (ctx->inst_info->tgsi_opcode) {
 		case TGSI_OPCODE_SUB:
-			r600_bc_src_toggle_neg(&alu.src[1]);
+			r600_bytecode_src_toggle_neg(&alu.src[1]);
 			break;
 		case TGSI_OPCODE_ABS:
-			r600_bc_src_set_abs(&alu.src[0]);
+			r600_bytecode_src_set_abs(&alu.src[0]);
 			break;
 		default:
 			break;
@@ -1018,7 +1018,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
 		if (i == lasti) {
 			alu.last = 1;
 		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1039,21 +1039,21 @@ static int cayman_emit_float_instr(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	int i, j, r;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
 	
 	for (i = 0 ; i < last_slot; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = ctx->inst_info->r600_opcode;
 		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
-			r600_bc_src(&alu.src[j], &ctx->src[j], 0);
+			r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
 		}
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
 
 		if (i == last_slot - 1)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1072,9 +1072,9 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
 	static float neg_pi = -3.1415926535;
 
 	int r;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 	alu.is_op3 = 1;
 
@@ -1082,7 +1082,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 
-	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 
 	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
 	alu.src[1].chan = 0;
@@ -1090,11 +1090,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
 	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
 	alu.src[2].chan = 0;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
 
 	alu.dst.chan = 0;
@@ -1104,11 +1104,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
 	alu.src[0].sel = ctx->temp_reg;
 	alu.src[0].chan = 0;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 	alu.is_op3 = 1;
 
@@ -1134,7 +1134,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
 	}
 
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	return 0;
@@ -1143,7 +1143,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
 static int cayman_trig(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
 	int i, r;
 
@@ -1153,7 +1153,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx)
 
 
 	for (i = 0; i < last_slot; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = ctx->inst_info->r600_opcode;
 		alu.dst.chan = i;
 
@@ -1164,7 +1164,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx)
 		alu.src[0].chan = 0;
 		if (i == last_slot - 1)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1174,7 +1174,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx)
 static int tgsi_trig(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 
@@ -1182,7 +1182,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
 	if (r)
 		return r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = ctx->inst_info->r600_opcode;
 	alu.dst.chan = 0;
 	alu.dst.sel = ctx->temp_reg;
@@ -1191,7 +1191,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
 	alu.src[0].sel = ctx->temp_reg;
 	alu.src[0].chan = 0;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
@@ -1200,14 +1200,14 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 
 		alu.src[0].sel = ctx->temp_reg;
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 		if (i == lasti)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1217,7 +1217,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
 static int tgsi_scs(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
 	/* We'll only need the trig stuff if we are going to write to the
@@ -1233,7 +1233,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0 ; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
 				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 
@@ -1245,19 +1245,19 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 				alu.src[0].chan = 0;
 				if (i == 2)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
 			tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
 
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 0;
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -1267,7 +1267,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0 ; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
 				tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 				if (i == 1)
@@ -1278,19 +1278,19 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 				alu.src[0].chan = 0;
 				if (i == 2)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
 			tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
 
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 0;
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -1298,7 +1298,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 
 	/* dst.z = 0.0; */
 	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 
@@ -1309,14 +1309,14 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
 
 	/* dst.w = 1.0; */
 	if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 
@@ -1327,7 +1327,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1337,11 +1337,11 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 
 static int tgsi_kill(struct r600_shader_ctx *ctx)
 {
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = ctx->inst_info->r600_opcode;
 
 		alu.dst.chan = i;
@@ -1352,12 +1352,12 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
 			alu.src[1].sel = V_SQ_ALU_SRC_1;
 			alu.src[1].neg = 1;
 		} else {
-			r600_bc_src(&alu.src[1], &ctx->src[0], i);
+			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
 		}
 		if (i == 3) {
 			alu.last = 1;
 		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1371,13 +1371,13 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
 static int tgsi_lit(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 
 	/* tmp.x = max(src.y, 0.0) */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
-	r600_bc_src(&alu.src[0], &ctx->src[0], 1);
+	r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
 	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
 	alu.src[1].chan = 1;
 
@@ -1386,7 +1386,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 	alu.dst.write = 1;
 
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
@@ -1399,7 +1399,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
 				/* tmp.z = log(tmp.x) */
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
 				alu.src[0].sel = ctx->temp_reg;
 				alu.src[0].chan = 0;
@@ -1411,13 +1411,13 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 				} else
 					alu.dst.write = 0;
 				
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
 			/* tmp.z = log(tmp.x) */
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 0;
@@ -1425,7 +1425,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 			alu.dst.chan = 2;
 			alu.dst.write = 1;
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -1434,25 +1434,25 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 		sel = alu.dst.sel;
 
 		/* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
 		alu.src[0].sel  = sel;
 		alu.src[0].chan = chan;
-		r600_bc_src(&alu.src[1], &ctx->src[0], 3);
-		r600_bc_src(&alu.src[2], &ctx->src[0], 0);
+		r600_bytecode_src(&alu.src[1], &ctx->src[0], 3);
+		r600_bytecode_src(&alu.src[2], &ctx->src[0], 0);
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = 0;
 		alu.dst.write = 1;
 		alu.is_op3 = 1;
 		alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
 				/* dst.z = exp(tmp.x) */
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 				alu.src[0].sel = ctx->temp_reg;
 				alu.src[0].chan = 0;
@@ -1462,56 +1462,56 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 					alu.last = 1;
 				} else
 					alu.dst.write = 0;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
 			/* dst.z = exp(tmp.x) */
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 0;
 			tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
 	}
 
 	/* dst.x, <- 1.0  */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 	alu.src[0].sel  = V_SQ_ALU_SRC_1; /*1.0*/
 	alu.src[0].chan = 0;
 	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
 	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
 	/* dst.y = max(src.x, 0.0) */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
-	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 	alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
 	alu.src[1].chan = 0;
 	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
 	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
 	/* dst.w, <- 1.0  */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 	alu.src[0].sel  = V_SQ_ALU_SRC_1;
 	alu.src[0].chan = 0;
 	tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
 	alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
@@ -1521,10 +1521,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 static int tgsi_rsq(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 	/* FIXME:
 	 * For state trackers other than OpenGL, we'll want to use
@@ -1533,13 +1533,13 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
 
 	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
-		r600_bc_src_set_abs(&alu.src[i]);
+		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
+		r600_bytecode_src_set_abs(&alu.src[i]);
 	}
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	/* replicate result */
@@ -1549,11 +1549,11 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.src[0].sel = ctx->temp_reg;
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 		alu.dst.chan = i;
@@ -1561,7 +1561,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
 		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
 		if (i == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1571,18 +1571,18 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = ctx->inst_info->r600_opcode;
 	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-		r600_bc_src(&alu.src[i], &ctx->src[i], 0);
+		r600_bytecode_src(&alu.src[i], &ctx->src[i], 0);
 	}
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	/* replicate result */
@@ -1593,38 +1593,38 @@ static int cayman_pow(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	int i, r;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
 
 	for (i = 0; i < 3; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = i;
 		alu.dst.write = 1;
 		if (i == 2)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
 
 	/* b * LOG2(a) */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
-	r600_bc_src(&alu.src[0], &ctx->src[1], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
 	alu.src[1].sel = ctx->temp_reg;
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
 	for (i = 0; i < last_slot; i++) {
 		/* POW(a,b) = EXP2(b * LOG2(a))*/
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 		alu.src[0].sel = ctx->temp_reg;
 
@@ -1632,7 +1632,7 @@ static int cayman_pow(struct r600_shader_ctx *ctx)
 		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
 		if (i == last_slot - 1)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1641,38 +1641,38 @@ static int cayman_pow(struct r600_shader_ctx *ctx)
 
 static int tgsi_pow(struct r600_shader_ctx *ctx)
 {
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 
 	/* LOG2(a) */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	/* b * LOG2(a) */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
-	r600_bc_src(&alu.src[0], &ctx->src[1], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
 	alu.src[1].sel = ctx->temp_reg;
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	/* POW(a,b) = EXP2(b * LOG2(a))*/
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 	alu.src[0].sel = ctx->temp_reg;
 	alu.dst.sel = ctx->temp_reg;
 	alu.dst.write = 1;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	return tgsi_helper_tempx_replicate(ctx);
@@ -1681,32 +1681,32 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
 static int tgsi_ssg(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
 	/* tmp = (src > 0 ? 1 : src) */
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
 		alu.is_op3 = 1;
 
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = i;
 
-		r600_bc_src(&alu.src[0], &ctx->src[0], i);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
 		alu.src[1].sel = V_SQ_ALU_SRC_1;
-		r600_bc_src(&alu.src[2], &ctx->src[0], i);
+		r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
 
 		if (i == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
 
 	/* dst = (-tmp > 0 ? -1 : tmp) */
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
 		alu.is_op3 = 1;
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -1723,7 +1723,7 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
 
 		if (i == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1732,11 +1732,11 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
 
 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
 {
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP);
 			alu.dst.chan = i;
@@ -1749,7 +1749,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
 		if (i == 3) {
 			alu.last = 1;
 		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1759,7 +1759,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
 static int tgsi_op3(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, j, r;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 
@@ -1767,10 +1767,10 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = ctx->inst_info->r600_opcode;
 		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
-			r600_bc_src(&alu.src[j], &ctx->src[j], i);
+			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
 		}
 
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -1780,7 +1780,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
 		if (i == lasti) {
 			alu.last = 1;
 		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1790,14 +1790,14 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
 static int tgsi_dp(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, j, r;
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = ctx->inst_info->r600_opcode;
 		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
-			r600_bc_src(&alu.src[j], &ctx->src[j], i);
+			r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
 		}
 
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -1830,7 +1830,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
 		if (i == 3) {
 			alu.last = 1;
 		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -1857,8 +1857,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 {
 	static float one_point_five = 1.5f;
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_tex tex;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_tex tex;
+	struct r600_bytecode_alu alu;
 	unsigned src_gpr;
 	int r, i, j;
 	int opcode;
@@ -1876,7 +1876,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 
 		for (i = 1; i < 3; i++) {
 			/* set gradients h/v */
-			memset(&tex, 0, sizeof(struct r600_bc_tex));
+			memset(&tex, 0, sizeof(struct r600_bytecode_tex));
 			tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H :
 				SQ_TEX_INST_SET_GRADIENTS_V;
 			tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
@@ -1890,15 +1890,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 				tex.src_sel_w = 3;
 
 				for (j = 0; j < 4; j++) {
-					memset(&alu, 0, sizeof(struct r600_bc_alu));
+					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 					alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
-                                        r600_bc_src(&alu.src[0], &ctx->src[i], j);
+                                        r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
                                         alu.dst.sel = tex.src_gpr;
                                         alu.dst.chan = j;
                                         if (j == 3)
                                                 alu.last = 1;
                                         alu.dst.write = 1;
-                                        r = r600_bc_add_alu(ctx->bc, &alu);
+                                        r = r600_bytecode_add_alu(ctx->bc, &alu);
                                         if (r)
                                                 return r;
 				}
@@ -1919,7 +1919,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 				tex.coord_type_z = 1;
 				tex.coord_type_w = 1;
 			}
-			r = r600_bc_add_tex(ctx->bc, &tex);
+			r = r600_bytecode_add_tex(ctx->bc, &tex);
 			if (r)
 				return r;
 		}
@@ -1929,9 +1929,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		if (ctx->bc->chip_class == CAYMAN) {
 			out_chan = 2;
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
-				r600_bc_src(&alu.src[0], &ctx->src[0], 3);
+				r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
 
 				alu.dst.sel = ctx->temp_reg;
 				alu.dst.chan = i;
@@ -1939,40 +1939,40 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 					alu.last = 1;
 				if (out_chan == i)
 					alu.dst.write = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 
 		} else {
 			out_chan = 3;
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
-			r600_bc_src(&alu.src[0], &ctx->src[0], 3);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], 3);
 
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.chan = out_chan;
 			alu.last = 1;
 			alu.dst.write = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
 
 		for (i = 0; i < 3; i++) {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = out_chan;
-			r600_bc_src(&alu.src[1], &ctx->src[0], i);
+			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.chan = i;
 			alu.dst.write = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 		alu.src[0].sel = V_SQ_ALU_SRC_1;
 		alu.src[0].chan = 0;
@@ -1980,7 +1980,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		alu.dst.chan = 3;
 		alu.last = 1;
 		alu.dst.write = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 		src_loaded = TRUE;
@@ -1993,16 +1993,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 
 		/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
 		for (i = 0; i < 4; i++) {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
-			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
-			r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
+			r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.chan = i;
 			if (i == 3)
 				alu.last = 1;
 			alu.dst.write = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2010,7 +2010,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		/* tmp1.z = RCP_e(|tmp1.z|) */
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
 				alu.src[0].sel = ctx->temp_reg;
 				alu.src[0].chan = 2;
@@ -2021,12 +2021,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 					alu.dst.write = 1;
 				if (i == 2)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 2;
@@ -2035,7 +2035,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 			alu.dst.chan = 2;
 			alu.dst.write = 1;
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2044,7 +2044,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		 * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
 		 * muladd has no writemask, have to use another temp
 		 */
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 		alu.is_op3 = 1;
 
@@ -2061,11 +2061,11 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		alu.dst.chan = 0;
 		alu.dst.write = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 		alu.is_op3 = 1;
 
@@ -2083,7 +2083,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		alu.dst.write = 1;
 
 		alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 
@@ -2093,15 +2093,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 
 	if (src_requires_loading && !src_loaded) {
 		for (i = 0; i < 4; i++) {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
-			r600_bc_src(&alu.src[0], &ctx->src[0], i);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.chan = i;
 			if (i == 3)
 				alu.last = 1;
 			alu.dst.write = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2124,7 +2124,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		}
 	}
 
-	memset(&tex, 0, sizeof(struct r600_bc_tex));
+	memset(&tex, 0, sizeof(struct r600_bytecode_tex));
 	tex.inst = opcode;
 
 	tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
@@ -2171,7 +2171,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 	if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
 		tex.src_sel_w = tex.src_sel_z;
 
-	r = r600_bc_add_tex(ctx->bc, &tex);
+	r = r600_bytecode_add_tex(ctx->bc, &tex);
 	if (r)
 		return r;
 
@@ -2182,7 +2182,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 static int tgsi_lrp(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 	unsigned i;
 	int r;
@@ -2193,17 +2193,17 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 				continue;
 
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
-			r600_bc_src(&alu.src[0], &ctx->src[1], i);
-			r600_bc_src(&alu.src[1], &ctx->src[2], i);
+			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
+			r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
 			alu.omod = 3;
 			tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 			alu.dst.chan = i;
 			if (i == lasti) {
 				alu.last = 1;
 			}
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2215,19 +2215,19 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
 		alu.src[0].sel = V_SQ_ALU_SRC_1;
 		alu.src[0].chan = 0;
-		r600_bc_src(&alu.src[1], &ctx->src[0], i);
-		r600_bc_src_toggle_neg(&alu.src[1]);
+		r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
+		r600_bytecode_src_toggle_neg(&alu.src[1]);
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = i;
 		if (i == lasti) {
 			alu.last = 1;
 		}
 		alu.dst.write = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2237,18 +2237,18 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
 		alu.src[0].sel = ctx->temp_reg;
 		alu.src[0].chan = i;
-		r600_bc_src(&alu.src[1], &ctx->src[2], i);
+		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = i;
 		if (i == lasti) {
 			alu.last = 1;
 		}
 		alu.dst.write = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2258,11 +2258,11 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 		alu.is_op3 = 1;
-		r600_bc_src(&alu.src[0], &ctx->src[0], i);
-		r600_bc_src(&alu.src[1], &ctx->src[1], i);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+		r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
 		alu.src[2].sel = ctx->temp_reg;
 		alu.src[2].chan = i;
 
@@ -2271,7 +2271,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		if (i == lasti) {
 			alu.last = 1;
 		}
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2281,7 +2281,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 static int tgsi_cmp(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 
@@ -2289,18 +2289,18 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
-		r600_bc_src(&alu.src[0], &ctx->src[0], i);
-		r600_bc_src(&alu.src[1], &ctx->src[2], i);
-		r600_bc_src(&alu.src[2], &ctx->src[1], i);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+		r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
+		r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 		alu.dst.chan = i;
 		alu.dst.write = 1;
 		alu.is_op3 = 1;
 		if (i == lasti)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2312,7 +2312,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	static const unsigned int src0_swizzle[] = {2, 0, 1};
 	static const unsigned int src1_swizzle[] = {1, 2, 0};
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	uint32_t use_temp = 0;
 	int i, r;
 
@@ -2320,11 +2320,11 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
 		use_temp = 1;
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
 		if (i < 3) {
-			r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
-			r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
+			r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
 		} else {
 			alu.src[0].sel = V_SQ_ALU_SRC_0;
 			alu.src[0].chan = i;
@@ -2338,18 +2338,18 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
 
 		if (i == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 
 		if (i < 3) {
-			r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
-			r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
+			r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
 		} else {
 			alu.src[0].sel = V_SQ_ALU_SRC_0;
 			alu.src[0].chan = i;
@@ -2370,7 +2370,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
 		alu.is_op3 = 1;
 		if (i == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2382,22 +2382,22 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
 static int tgsi_exp(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 	int i;
 
 	/* result.x = 2^floor(src); */
 	if (inst->Dst[0].Register.WriteMask & 1) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
-		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = 0;
 		alu.dst.write = 1;
 		alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 
@@ -2413,7 +2413,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 					alu.dst.write = 1;
 				if (i == 2)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
@@ -2426,7 +2426,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 			alu.dst.chan = 0;
 			alu.dst.write = 1;
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2434,10 +2434,10 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 
 	/* result.y = tmp - floor(tmp); */
 	if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
-		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 
 		alu.dst.sel = ctx->temp_reg;
 #if 0
@@ -2450,7 +2450,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2459,9 +2459,9 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 	if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
-				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 
 				alu.dst.sel = ctx->temp_reg;
 				alu.dst.chan = i;
@@ -2470,14 +2470,14 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 					alu.last = 1;
 				}
 
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
-			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.write = 1;
@@ -2485,7 +2485,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 
 			alu.last = 1;
 
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2493,7 +2493,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 
 	/* result.w = 1.0;*/
 	if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 		alu.src[0].sel = V_SQ_ALU_SRC_1;
@@ -2503,7 +2503,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 		alu.dst.chan = 3;
 		alu.dst.write = 1;
 		alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2513,7 +2513,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 static int tgsi_log(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 	int i;
 
@@ -2521,11 +2521,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 	if (inst->Dst[0].Register.WriteMask & 1) {
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-				r600_bc_src_set_abs(&alu.src[0]);
+				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+				r600_bytecode_src_set_abs(&alu.src[0]);
 			
 				alu.dst.sel = ctx->temp_reg;
 				alu.dst.chan = i;
@@ -2533,23 +2533,23 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 					alu.dst.write = 1;
 				if (i == 2)
 					alu.last = 1;
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-			r600_bc_src_set_abs(&alu.src[0]);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+			r600_bytecode_src_set_abs(&alu.src[0]);
 			
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.chan = 0;
 			alu.dst.write = 1;
 			alu.last = 1;
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2563,7 +2563,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2573,11 +2573,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-				r600_bc_src_set_abs(&alu.src[0]);
+				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+				r600_bytecode_src_set_abs(&alu.src[0]);
 
 				alu.dst.sel = ctx->temp_reg;
 				alu.dst.chan = i;
@@ -2586,28 +2586,28 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 				if (i == 2)
 					alu.last = 1;
 				
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;	
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-			r600_bc_src_set_abs(&alu.src[0]);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+			r600_bytecode_src_set_abs(&alu.src[0]);
 
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.chan = 1;
 			alu.dst.write = 1;
 			alu.last = 1;
 
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
 		alu.src[0].sel = ctx->temp_reg;
@@ -2618,13 +2618,13 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 				alu.src[0].sel = ctx->temp_reg;
 				alu.src[0].chan = 1;
@@ -2636,12 +2636,12 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 				if (i == 2)
 					alu.last = 1;
 
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 1;
@@ -2651,14 +2651,14 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 			alu.dst.write = 1;
 			alu.last = 1;
 
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
 
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
 				alu.src[0].sel = ctx->temp_reg;
 				alu.src[0].chan = 1;
@@ -2670,12 +2670,12 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 				if (i == 2)
 					alu.last = 1;
 				
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
 			alu.src[0].sel = ctx->temp_reg;
 			alu.src[0].chan = 1;
@@ -2685,17 +2685,17 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 			alu.dst.write = 1;
 			alu.last = 1;
 
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
 
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
 
-		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-		r600_bc_src_set_abs(&alu.src[0]);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+		r600_bytecode_src_set_abs(&alu.src[0]);
 
 		alu.src[1].sel = ctx->temp_reg;
 		alu.src[1].chan = 1;
@@ -2705,7 +2705,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2714,11 +2714,11 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 	if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
 		if (ctx->bc->chip_class == CAYMAN) {
 			for (i = 0; i < 3; i++) {
-				memset(&alu, 0, sizeof(struct r600_bc_alu));
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-				r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-				r600_bc_src_set_abs(&alu.src[0]);
+				r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+				r600_bytecode_src_set_abs(&alu.src[0]);
 
 				alu.dst.sel = ctx->temp_reg;
 				if (i == 2)
@@ -2727,23 +2727,23 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 				if (i == 2)
 					alu.last = 1;
 
-				r = r600_bc_add_alu(ctx->bc, &alu);
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
 			}
 		} else {
-			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
-			r600_bc_src(&alu.src[0], &ctx->src[0], 0);
-			r600_bc_src_set_abs(&alu.src[0]);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+			r600_bytecode_src_set_abs(&alu.src[0]);
 
 			alu.dst.sel = ctx->temp_reg;
 			alu.dst.write = 1;
 			alu.dst.chan = 2;
 			alu.last = 1;
 
-			r = r600_bc_add_alu(ctx->bc, &alu);
+			r = r600_bytecode_add_alu(ctx->bc, &alu);
 			if (r)
 				return r;
 		}
@@ -2751,7 +2751,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 
 	/* result.w = 1.0; */
 	if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 		alu.src[0].sel = V_SQ_ALU_SRC_1;
@@ -2762,7 +2762,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2773,10 +2773,10 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 	switch (inst->Instruction.Opcode) {
 	case TGSI_OPCODE_ARL:
@@ -2790,11 +2790,11 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
 		return -1;
 	}
 
-	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 	alu.last = 1;
 	alu.dst.sel = ctx->ar_reg;
 	alu.dst.write = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 
@@ -2804,12 +2804,12 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
 	 * between ARL and AR usage. The easy way to do that is to remove
 	 * the MOVA here, and load it for the first AR access after ar_reg
 	 * has been modified in each clause. */
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
 	alu.src[0].sel = ctx->ar_reg;
 	alu.src[0].chan = 0;
 	alu.last = 1;
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	return 0;
@@ -2818,19 +2818,19 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
 {
 	/* TODO from r600c, ar values don't persist between clauses */
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 
 	switch (inst->Instruction.Opcode) {
 	case TGSI_OPCODE_ARL:
 		memset(&alu, 0, sizeof(alu));
 		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
-		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 		alu.dst.sel = ctx->ar_reg;
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
 			return r;
 
 		memset(&alu, 0, sizeof(alu));
@@ -2840,18 +2840,18 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
 			return r;
 		break;
 	case TGSI_OPCODE_ARR:
 		memset(&alu, 0, sizeof(alu));
 		alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
-		r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+		r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 		alu.dst.sel = ctx->ar_reg;
 		alu.dst.write = 1;
 		alu.last = 1;
 
-		if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+		if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
 			return r;
 		break;
 	default:
@@ -2864,7 +2864,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
 	alu.src[0].sel = ctx->ar_reg;
 	alu.last = 1;
 
-	r = r600_bc_add_alu(ctx->bc, &alu);
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
 	ctx->bc->cf_last->r6xx_uses_waterfall = 1;
@@ -2874,11 +2874,11 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
 static int tgsi_opdst(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int i, r = 0;
 
 	for (i = 0; i < 4; i++) {
-		memset(&alu, 0, sizeof(struct r600_bc_alu));
+		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -2886,17 +2886,17 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
 		if (i == 0 || i == 3) {
 			alu.src[0].sel = V_SQ_ALU_SRC_1;
 		} else {
-			r600_bc_src(&alu.src[0], &ctx->src[0], i);
+			r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
 		}
 
 		if (i == 0 || i == 2) {
 			alu.src[1].sel = V_SQ_ALU_SRC_1;
 		} else {
-			r600_bc_src(&alu.src[1], &ctx->src[1], i);
+			r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
 		}
 		if (i == 3)
 			alu.last = 1;
-		r = r600_bc_add_alu(ctx->bc, &alu);
+		r = r600_bytecode_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
@@ -2905,10 +2905,10 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
 
 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
 {
-	struct r600_bc_alu alu;
+	struct r600_bytecode_alu alu;
 	int r;
 
-	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
 	alu.inst = opcode;
 	alu.predicate = 1;
 
@@ -2916,13 +2916,13 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
 	alu.dst.write = 1;
 	alu.dst.chan = 0;
 
-	r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+	r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
 	alu.src[1].sel = V_SQ_ALU_SRC_0;
 	alu.src[1].chan = 0;
 
 	alu.last = 1;
 
-	r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
+	r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
 	if (r)
 		return r;
 	return 0;
@@ -2953,7 +2953,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
 	}
 
 	if (force_pop) {
-		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
+		r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
 		ctx->bc->cf_last->pop_count = pops;
 		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
 	}
@@ -3024,8 +3024,8 @@ static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
 {
 	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
 
-	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
-						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
+	sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid,
+						sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1));
 	sp->mid[sp->num_mid] = ctx->bc->cf_last;
 	sp->num_mid++;
 }
@@ -3053,14 +3053,14 @@ static void fc_poplevel(struct r600_shader_ctx *ctx)
 #if 0
 static int emit_return(struct r600_shader_ctx *ctx)
 {
-	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
+	r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
 	return 0;
 }
 
 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
 {
 
-	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+	r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
 	ctx->bc->cf_last->pop_count = pops;
 	/* TODO work out offset */
 	return 0;
@@ -3089,7 +3089,7 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
 {
 	emit_testflag(ctx);
 
-	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
 	ctx->bc->cf_last->pop_count = 1;
 
 	fc_set_mid(ctx, fc_sp);
@@ -3102,7 +3102,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx)
 {
 	emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE));
 
-	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
+	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
 
 	fc_pushlevel(ctx, FC_IF);
 
@@ -3112,7 +3112,7 @@ static int tgsi_if(struct r600_shader_ctx *ctx)
 
 static int tgsi_else(struct r600_shader_ctx *ctx)
 {
-	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
+	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE));
 	ctx->bc->cf_last->pop_count = 1;
 
 	fc_set_mid(ctx, ctx->bc->fc_sp);
@@ -3142,7 +3142,7 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
 
 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
 {
-	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
+	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
 
 	fc_pushlevel(ctx, FC_LOOP);
 
@@ -3155,7 +3155,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx)
 {
 	int i;
 
-	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
+	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END));
 
 	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
 		R600_ERR("loop/endloop in shader code are not paired.\n");
@@ -3195,7 +3195,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
 		return -EINVAL;
 	}
 
-	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+	r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
 	ctx->bc->cf_last->pop_count = 1;
 
 	fc_set_mid(ctx, fscp);
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 600c3e2f540..ada369ade68 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -37,7 +37,7 @@ struct r600_shader_io {
 
 struct r600_shader {
 	unsigned		processor_type;
-	struct r600_bc		bc;
+	struct r600_bytecode		bc;
 	unsigned		ninput;
 	unsigned		noutput;
 	unsigned		npos;
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index b3c7d1494fc..74efe226530 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -26,7 +26,7 @@
 #include "r600_asm.h"
 #include "r700_sq.h"
 
-void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf)
+void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf)
 {
 	unsigned count = (cf->ndw / 4) - 1;
 	*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
@@ -36,7 +36,7 @@ void r700_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf)
 			S_SQ_CF_WORD1_COUNT_3(count >> 3);
 }
 
-int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
+int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
 {
 	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
 		S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |

From 5e7713caa9d601e59b600218a3b867db0f331deb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 16 Aug 2011 18:48:11 +0200
Subject: [PATCH 349/600] st/dri: remove an unused-but-set variable

---
 src/gallium/state_trackers/dri/drm/dri2.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c
index 53638da9888..cf476056f41 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -46,7 +46,6 @@
 static void
 dri2_flush_drawable(__DRIdrawable *draw)
 {
-   struct dri_drawable *drawable = dri_drawable(draw);
    struct dri_context *ctx = dri_get_current(draw->driScreenPriv);
 
    if (ctx)

From 9e8f556b199a662c5525b9d03f52a067244fa602 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 16 Aug 2011 19:06:55 +0200
Subject: [PATCH 350/600] softpipe: fix an obvious copy-paste error in
 get_query_result

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/softpipe/sp_query.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index 4ae69c1c2bd..88f42572f19 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -157,7 +157,7 @@ softpipe_get_query_result(struct pipe_context *pipe,
       /*os_get_time is in microseconds*/
       td.frequency = 1000000;
       td.disjoint = FALSE;
-      memcpy(vresult, &sq->so,
+      memcpy(vresult, &td,
              sizeof(struct pipe_query_data_timestamp_disjoint));
    }
       break;

From af501e2b29c7fb161671dc5b3395eee1d1b16d3f Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Fri, 12 Aug 2011 10:20:34 -0700
Subject: [PATCH 351/600] glsl: Fix type error when lowering integer divisions

This patch fixes a bug when lowering an integer division:

  x/y

to a multiplication by a reciprocal:

  int(float(x)*reciprocal(float(y)))

If x was a plain int and y was an ivecN, the lowering pass
incorrectly assigned the type of the product to be float, when in fact
it should be vecN.  This caused mesa to abort with an IR validation
error.

Fixes piglit tests {fs,vs}-op-div-int-ivec{2,3,4}.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/lower_instructions.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 806f8639959..23aa19bde6f 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -166,6 +166,10 @@ lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
       else
 	 op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
 
+      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+					 ir->type->vector_elements,
+					 ir->type->matrix_columns);
+
       op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
 
       if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) {

From 11e4ea0010c3a756cfdaf427c14e104c9a11a645 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 16 Aug 2011 13:05:26 -0600
Subject: [PATCH 352/600] mesa: ChooseTextureFormat() returns gl_format, not
 GLuint

---
 src/mesa/main/dd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index e0c5844e193..b5ed9a40c70 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -194,7 +194,7 @@ struct dd_function_table {
     * cases, srcFormat and srcType can be GL_NONE.
     * Called by glTexImage(), etc.
     */
-   GLuint (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat,
+   gl_format (*ChooseTextureFormat)( struct gl_context *ctx, GLint internalFormat,
                                      GLenum srcFormat, GLenum srcType );
 
    /**

From c1f00731fd48dde68b67f157c27eb20982e82193 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 3 May 2011 15:27:38 -0700
Subject: [PATCH 353/600] i965: Generate driver-specific IR for non-fragment
 shaders as well.

This will be used by the new vertex shader backend.  The scalarizing
passes are skipped for non-fragment, since vertex and geometry threads
are based on vec4s.
---
 src/mesa/drivers/dri/i965/brw_shader.cpp | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 9471883fb2b..f4005f80055 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -75,10 +75,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 {
    struct brw_context *brw = brw_context(ctx);
    struct intel_context *intel = &brw->intel;
+   unsigned int stage;
+
+   for (stage = 0; stage < ARRAY_SIZE(prog->_LinkedShaders); stage++) {
+      struct brw_shader *shader =
+	 (struct brw_shader *)prog->_LinkedShaders[stage];
+
+      if (!shader)
+	 continue;
 
-   struct brw_shader *shader =
-      (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
-   if (shader != NULL) {
       void *mem_ctx = ralloc_context(NULL);
       bool progress;
 
@@ -116,8 +121,10 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
       do {
 	 progress = false;
 
-	 brw_do_channel_expressions(shader->ir);
-	 brw_do_vector_splitting(shader->ir);
+	 if (stage == MESA_SHADER_FRAGMENT) {
+	    brw_do_channel_expressions(shader->ir);
+	    brw_do_vector_splitting(shader->ir);
+	 }
 
 	 progress = do_lower_jumps(shader->ir, true, true,
 				   true, /* main return */

From 6034b9a5124475d300d0678bd2fb6160865fa972 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 3 May 2011 10:55:50 -0700
Subject: [PATCH 354/600] i965: Create a shared enum for hardware and
 compiler-internal opcodes.

This should make gdbing more pleasant, and it might be used in sharing
part of the codegen between the VS and FS backends.
---
 src/mesa/drivers/dri/i965/brw_defines.h       | 134 +++++++++++-------
 src/mesa/drivers/dri/i965/brw_fs.cpp          |  11 +-
 src/mesa/drivers/dri/i965/brw_fs.h            |  56 +++-----
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp     |   6 +
 .../drivers/dri/i965/brw_fs_reg_allocate.cpp  |  20 +--
 .../dri/i965/brw_fs_schedule_instructions.cpp |  15 --
 src/mesa/drivers/dri/i965/brw_shader.h        |   4 +
 7 files changed, 120 insertions(+), 126 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 0a3027d04ad..fe5d29c4328 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -557,58 +557,88 @@
 #define BRW_WE_ALL		1
 /** @} */
 
-#define BRW_OPCODE_MOV        1
-#define BRW_OPCODE_SEL        2
-#define BRW_OPCODE_NOT        4
-#define BRW_OPCODE_AND        5
-#define BRW_OPCODE_OR         6
-#define BRW_OPCODE_XOR        7
-#define BRW_OPCODE_SHR        8
-#define BRW_OPCODE_SHL        9
-#define BRW_OPCODE_RSR        10
-#define BRW_OPCODE_RSL        11
-#define BRW_OPCODE_ASR        12
-#define BRW_OPCODE_CMP        16
-#define BRW_OPCODE_CMPN       17
-#define BRW_OPCODE_JMPI       32
-#define BRW_OPCODE_IF         34
-#define BRW_OPCODE_IFF        35
-#define BRW_OPCODE_ELSE       36
-#define BRW_OPCODE_ENDIF      37
-#define BRW_OPCODE_DO         38
-#define BRW_OPCODE_WHILE      39
-#define BRW_OPCODE_BREAK      40
-#define BRW_OPCODE_CONTINUE   41
-#define BRW_OPCODE_HALT       42
-#define BRW_OPCODE_MSAVE      44
-#define BRW_OPCODE_MRESTORE   45
-#define BRW_OPCODE_PUSH       46
-#define BRW_OPCODE_POP        47
-#define BRW_OPCODE_WAIT       48
-#define BRW_OPCODE_SEND       49
-#define BRW_OPCODE_SENDC      50
-#define BRW_OPCODE_MATH       56
-#define BRW_OPCODE_ADD        64
-#define BRW_OPCODE_MUL        65
-#define BRW_OPCODE_AVG        66
-#define BRW_OPCODE_FRC        67
-#define BRW_OPCODE_RNDU       68
-#define BRW_OPCODE_RNDD       69
-#define BRW_OPCODE_RNDE       70
-#define BRW_OPCODE_RNDZ       71
-#define BRW_OPCODE_MAC        72
-#define BRW_OPCODE_MACH       73
-#define BRW_OPCODE_LZD        74
-#define BRW_OPCODE_SAD2       80
-#define BRW_OPCODE_SADA2      81
-#define BRW_OPCODE_DP4        84
-#define BRW_OPCODE_DPH        85
-#define BRW_OPCODE_DP3        86
-#define BRW_OPCODE_DP2        87
-#define BRW_OPCODE_DPA2       88
-#define BRW_OPCODE_LINE       89
-#define BRW_OPCODE_PLN        90
-#define BRW_OPCODE_NOP        126
+enum opcode {
+   /* These are the actual hardware opcodes. */
+   BRW_OPCODE_MOV =	1,
+   BRW_OPCODE_SEL =	2,
+   BRW_OPCODE_NOT =	4,
+   BRW_OPCODE_AND =	5,
+   BRW_OPCODE_OR =	6,
+   BRW_OPCODE_XOR =	7,
+   BRW_OPCODE_SHR =	8,
+   BRW_OPCODE_SHL =	9,
+   BRW_OPCODE_RSR =	10,
+   BRW_OPCODE_RSL =	11,
+   BRW_OPCODE_ASR =	12,
+   BRW_OPCODE_CMP =	16,
+   BRW_OPCODE_CMPN =	17,
+   BRW_OPCODE_JMPI =	32,
+   BRW_OPCODE_IF =	34,
+   BRW_OPCODE_IFF =	35,
+   BRW_OPCODE_ELSE =	36,
+   BRW_OPCODE_ENDIF =	37,
+   BRW_OPCODE_DO =	38,
+   BRW_OPCODE_WHILE =	39,
+   BRW_OPCODE_BREAK =	40,
+   BRW_OPCODE_CONTINUE = 41,
+   BRW_OPCODE_HALT =	42,
+   BRW_OPCODE_MSAVE =	44,
+   BRW_OPCODE_MRESTORE = 45,
+   BRW_OPCODE_PUSH =	46,
+   BRW_OPCODE_POP =	47,
+   BRW_OPCODE_WAIT =	48,
+   BRW_OPCODE_SEND =	49,
+   BRW_OPCODE_SENDC =	50,
+   BRW_OPCODE_MATH =	56,
+   BRW_OPCODE_ADD =	64,
+   BRW_OPCODE_MUL =	65,
+   BRW_OPCODE_AVG =	66,
+   BRW_OPCODE_FRC =	67,
+   BRW_OPCODE_RNDU =	68,
+   BRW_OPCODE_RNDD =	69,
+   BRW_OPCODE_RNDE =	70,
+   BRW_OPCODE_RNDZ =	71,
+   BRW_OPCODE_MAC =	72,
+   BRW_OPCODE_MACH =	73,
+   BRW_OPCODE_LZD =	74,
+   BRW_OPCODE_SAD2 =	80,
+   BRW_OPCODE_SADA2 =	81,
+   BRW_OPCODE_DP4 =	84,
+   BRW_OPCODE_DPH =	85,
+   BRW_OPCODE_DP3 =	86,
+   BRW_OPCODE_DP2 =	87,
+   BRW_OPCODE_DPA2 =	88,
+   BRW_OPCODE_LINE =	89,
+   BRW_OPCODE_PLN =	90,
+   BRW_OPCODE_NOP =	126,
+
+   /* These are compiler backend opcodes that get translated into other
+    * instructions.
+    */
+   FS_OPCODE_FB_WRITE = 128,
+   FS_OPCODE_RCP,
+   FS_OPCODE_RSQ,
+   FS_OPCODE_SQRT,
+   FS_OPCODE_EXP2,
+   FS_OPCODE_LOG2,
+   FS_OPCODE_POW,
+   FS_OPCODE_SIN,
+   FS_OPCODE_COS,
+   FS_OPCODE_DDX,
+   FS_OPCODE_DDY,
+   FS_OPCODE_PIXEL_X,
+   FS_OPCODE_PIXEL_Y,
+   FS_OPCODE_CINTERP,
+   FS_OPCODE_LINTERP,
+   FS_OPCODE_TEX,
+   FS_OPCODE_TXB,
+   FS_OPCODE_TXD,
+   FS_OPCODE_TXL,
+   FS_OPCODE_DISCARD,
+   FS_OPCODE_SPILL,
+   FS_OPCODE_UNSPILL,
+   FS_OPCODE_PULL_CONSTANT_LOAD,
+};
 
 #define BRW_PREDICATE_NONE             0
 #define BRW_PREDICATE_NORMAL           1
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index cafb7092ac8..a0d75cc6f96 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -522,7 +522,7 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
 }
 
 fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
 {
    switch (opcode) {
    case FS_OPCODE_RCP:
@@ -565,7 +565,7 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
 }
 
 fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
 {
    int base_mrf = 2;
    fs_inst *inst;
@@ -1149,6 +1149,9 @@ fs_visitor::propagate_constants()
 		  progress = true;
 	       }
 	       break;
+
+	    default:
+	       break;
 	    }
 	 }
 
@@ -1200,6 +1203,8 @@ fs_visitor::opt_algebraic()
 	    break;
 	 }
 
+	 break;
+      default:
 	 break;
       }
    }
@@ -1267,6 +1272,8 @@ fs_visitor::register_coalesce()
       case BRW_OPCODE_ENDIF:
 	 if_depth--;
 	 break;
+      default:
+	 break;
       }
       if (loop_depth || if_depth)
 	 continue;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 4ec649014de..d207ac27aa2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -25,6 +25,8 @@
  *
  */
 
+#include "brw_shader.h"
+
 extern "C" {
 
 #include <sys/types.h>
@@ -55,33 +57,6 @@ enum register_file {
    BAD_FILE
 };
 
-enum fs_opcodes {
-   FS_OPCODE_FB_WRITE = 256,
-   FS_OPCODE_RCP,
-   FS_OPCODE_RSQ,
-   FS_OPCODE_SQRT,
-   FS_OPCODE_EXP2,
-   FS_OPCODE_LOG2,
-   FS_OPCODE_POW,
-   FS_OPCODE_SIN,
-   FS_OPCODE_COS,
-   FS_OPCODE_DDX,
-   FS_OPCODE_DDY,
-   FS_OPCODE_PIXEL_X,
-   FS_OPCODE_PIXEL_Y,
-   FS_OPCODE_CINTERP,
-   FS_OPCODE_LINTERP,
-   FS_OPCODE_TEX,
-   FS_OPCODE_TXB,
-   FS_OPCODE_TXD,
-   FS_OPCODE_TXL,
-   FS_OPCODE_DISCARD,
-   FS_OPCODE_SPILL,
-   FS_OPCODE_UNSPILL,
-   FS_OPCODE_PULL_CONSTANT_LOAD,
-};
-
-
 class fs_reg {
 public:
    /* Callers of this ralloc-based new need not call delete. It's
@@ -227,13 +202,13 @@ public:
       init();
    }
 
-   fs_inst(int opcode)
+   fs_inst(enum opcode opcode)
    {
       init();
       this->opcode = opcode;
    }
 
-   fs_inst(int opcode, fs_reg dst)
+   fs_inst(enum opcode opcode, fs_reg dst)
    {
       init();
       this->opcode = opcode;
@@ -243,7 +218,7 @@ public:
 	 assert(dst.reg_offset >= 0);
    }
 
-   fs_inst(int opcode, fs_reg dst, fs_reg src0)
+   fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0)
    {
       init();
       this->opcode = opcode;
@@ -256,7 +231,7 @@ public:
 	 assert(src[0].reg_offset >= 0);
    }
 
-   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+   fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
    {
       init();
       this->opcode = opcode;
@@ -272,7 +247,7 @@ public:
 	 assert(src[1].reg_offset >= 0);
    }
 
-   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
+   fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
    {
       init();
       this->opcode = opcode;
@@ -331,7 +306,7 @@ public:
 	      opcode == FS_OPCODE_POW);
    }
 
-   int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+   enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
    fs_reg dst;
    fs_reg src[3];
    bool saturate;
@@ -448,27 +423,28 @@ public:
 
    fs_inst *emit(fs_inst inst);
 
-   fs_inst *emit(int opcode)
+   fs_inst *emit(enum opcode opcode)
    {
       return emit(fs_inst(opcode));
    }
 
-   fs_inst *emit(int opcode, fs_reg dst)
+   fs_inst *emit(enum opcode opcode, fs_reg dst)
    {
       return emit(fs_inst(opcode, dst));
    }
 
-   fs_inst *emit(int opcode, fs_reg dst, fs_reg src0)
+   fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0)
    {
       return emit(fs_inst(opcode, dst, src0));
    }
 
-   fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+   fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
    {
       return emit(fs_inst(opcode, dst, src0, src1));
    }
 
-   fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
+   fs_inst *emit(enum opcode opcode, fs_reg dst,
+		 fs_reg src0, fs_reg src1, fs_reg src2)
    {
       return emit(fs_inst(opcode, dst, src0, src1, src2));
    }
@@ -529,8 +505,8 @@ public:
 			      int sampler);
    fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
 			      int sampler);
-   fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0);
-   fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
+   fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
    bool try_emit_saturate(ir_expression *ir);
    void emit_bool_to_cond_code(ir_rvalue *condition);
    void emit_if_gen6(ir_if *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index e168e541bef..529df0880f0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -277,6 +277,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	 /* There is no sample_d_c message; comparisons are done manually */
 	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
 	 break;
+      default:
+	 assert(!"not reached");
+	 break;
       }
    } else {
       switch (inst->opcode) {
@@ -317,6 +320,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	 assert(inst->mlen == 7 || inst->mlen == 10);
 	 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
 	 break;
+      default:
+	 assert(!"not reached");
+	 break;
       }
    }
    assert(msg_type != -1);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 5c9cba99ae5..7c5414ac26c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -25,23 +25,6 @@
  *
  */
 
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_optimize.h"
-#include "program/register_allocate.h"
-#include "program/sampler.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-}
 #include "brw_fs.h"
 #include "../glsl/glsl_types.h"
 #include "../glsl/ir_optimization.h"
@@ -359,6 +342,9 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
 	 if (inst->dst.file == GRF)
 	    no_spill[inst->dst.reg] = true;
 	 break;
+
+      default:
+	 break;
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index f1a88fcfa79..965a5b333a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -25,21 +25,6 @@
  *
  */
 
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_optimize.h"
-#include "program/register_allocate.h"
-#include "program/sampler.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-}
 #include "brw_fs.h"
 #include "../glsl/glsl_types.h"
 #include "../glsl/ir_optimization.h"
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 4c568a26caa..21671d1c8d6 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -21,5 +21,9 @@
  * IN THE SOFTWARE.
  */
 
+#include <stdint.h>
+
+#pragma once
+
 int brw_type_for_base_type(const struct glsl_type *type);
 uint32_t brw_conditional_for_comparison(unsigned int op);

From 65b5cbbcf783f6c668ab5b31a0734680dd396794 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 12:38:58 -0700
Subject: [PATCH 355/600] i965: Rename math FS_OPCODE_* to SHADER_OPCODE_*.

I want to just use the same enums in the VS.
---
 src/mesa/drivers/dri/i965/brw_defines.h       | 16 ++++-----
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 34 +++++++++----------
 src/mesa/drivers/dri/i965/brw_fs.h            | 16 ++++-----
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp     | 34 +++++++++----------
 .../dri/i965/brw_fs_schedule_instructions.cpp | 16 ++++-----
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  | 20 +++++------
 6 files changed, 68 insertions(+), 68 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index fe5d29c4328..da8d016da42 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -616,14 +616,14 @@ enum opcode {
     * instructions.
     */
    FS_OPCODE_FB_WRITE = 128,
-   FS_OPCODE_RCP,
-   FS_OPCODE_RSQ,
-   FS_OPCODE_SQRT,
-   FS_OPCODE_EXP2,
-   FS_OPCODE_LOG2,
-   FS_OPCODE_POW,
-   FS_OPCODE_SIN,
-   FS_OPCODE_COS,
+   SHADER_OPCODE_RCP,
+   SHADER_OPCODE_RSQ,
+   SHADER_OPCODE_SQRT,
+   SHADER_OPCODE_EXP2,
+   SHADER_OPCODE_LOG2,
+   SHADER_OPCODE_POW,
+   SHADER_OPCODE_SIN,
+   SHADER_OPCODE_COS,
    FS_OPCODE_DDX,
    FS_OPCODE_DDY,
    FS_OPCODE_PIXEL_X,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a0d75cc6f96..693ef0ce31a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -143,15 +143,15 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
       return 0;
 
    switch (inst->opcode) {
-   case FS_OPCODE_RCP:
-   case FS_OPCODE_RSQ:
-   case FS_OPCODE_SQRT:
-   case FS_OPCODE_EXP2:
-   case FS_OPCODE_LOG2:
-   case FS_OPCODE_SIN:
-   case FS_OPCODE_COS:
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
       return 1 * c->dispatch_width / 8;
-   case FS_OPCODE_POW:
+   case SHADER_OPCODE_POW:
       return 2 * c->dispatch_width / 8;
    case FS_OPCODE_TEX:
    case FS_OPCODE_TXB:
@@ -525,13 +525,13 @@ fs_inst *
 fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
 {
    switch (opcode) {
-   case FS_OPCODE_RCP:
-   case FS_OPCODE_RSQ:
-   case FS_OPCODE_SQRT:
-   case FS_OPCODE_EXP2:
-   case FS_OPCODE_LOG2:
-   case FS_OPCODE_SIN:
-   case FS_OPCODE_COS:
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
       break;
    default:
       assert(!"not reached: bad math opcode");
@@ -570,7 +570,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
    int base_mrf = 2;
    fs_inst *inst;
 
-   assert(opcode == FS_OPCODE_POW);
+   assert(opcode == SHADER_OPCODE_POW);
 
    if (intel->gen >= 6) {
       /* Can't do hstride == 0 args to gen6 math, so expand it out.
@@ -1135,7 +1135,7 @@ fs_visitor::propagate_constants()
 	       }
 	       break;
 
-	    case FS_OPCODE_RCP:
+	    case SHADER_OPCODE_RCP:
 	       /* The hardware doesn't do math on immediate values
 		* (because why are you doing that, seriously?), but
 		* the correct answer is to just constant fold it
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index d207ac27aa2..94af0e1af16 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -296,14 +296,14 @@ public:
 
    bool is_math()
    {
-      return (opcode == FS_OPCODE_RCP ||
-	      opcode == FS_OPCODE_RSQ ||
-	      opcode == FS_OPCODE_SQRT ||
-	      opcode == FS_OPCODE_EXP2 ||
-	      opcode == FS_OPCODE_LOG2 ||
-	      opcode == FS_OPCODE_SIN ||
-	      opcode == FS_OPCODE_COS ||
-	      opcode == FS_OPCODE_POW);
+      return (opcode == SHADER_OPCODE_RCP ||
+	      opcode == SHADER_OPCODE_RSQ ||
+	      opcode == SHADER_OPCODE_SQRT ||
+	      opcode == SHADER_OPCODE_EXP2 ||
+	      opcode == SHADER_OPCODE_LOG2 ||
+	      opcode == SHADER_OPCODE_SIN ||
+	      opcode == SHADER_OPCODE_COS ||
+	      opcode == SHADER_OPCODE_POW);
    }
 
    enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 529df0880f0..285ba46bd46 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -149,28 +149,28 @@ fs_visitor::generate_math(fs_inst *inst,
    int op;
 
    switch (inst->opcode) {
-   case FS_OPCODE_RCP:
+   case SHADER_OPCODE_RCP:
       op = BRW_MATH_FUNCTION_INV;
       break;
-   case FS_OPCODE_RSQ:
+   case SHADER_OPCODE_RSQ:
       op = BRW_MATH_FUNCTION_RSQ;
       break;
-   case FS_OPCODE_SQRT:
+   case SHADER_OPCODE_SQRT:
       op = BRW_MATH_FUNCTION_SQRT;
       break;
-   case FS_OPCODE_EXP2:
+   case SHADER_OPCODE_EXP2:
       op = BRW_MATH_FUNCTION_EXP;
       break;
-   case FS_OPCODE_LOG2:
+   case SHADER_OPCODE_LOG2:
       op = BRW_MATH_FUNCTION_LOG;
       break;
-   case FS_OPCODE_POW:
+   case SHADER_OPCODE_POW:
       op = BRW_MATH_FUNCTION_POW;
       break;
-   case FS_OPCODE_SIN:
+   case SHADER_OPCODE_SIN:
       op = BRW_MATH_FUNCTION_SIN;
       break;
-   case FS_OPCODE_COS:
+   case SHADER_OPCODE_COS:
       op = BRW_MATH_FUNCTION_COS;
       break;
    default:
@@ -182,7 +182,7 @@ fs_visitor::generate_math(fs_inst *inst,
    if (intel->gen >= 6) {
       assert(inst->mlen == 0);
 
-      if (inst->opcode == FS_OPCODE_POW) {
+      if (inst->opcode == SHADER_OPCODE_POW) {
 	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 	 brw_math2(p, dst, op, src[0], src[1]);
 
@@ -775,14 +775,14 @@ fs_visitor::generate_code()
       }
 	 break;
 
-      case FS_OPCODE_RCP:
-      case FS_OPCODE_RSQ:
-      case FS_OPCODE_SQRT:
-      case FS_OPCODE_EXP2:
-      case FS_OPCODE_LOG2:
-      case FS_OPCODE_POW:
-      case FS_OPCODE_SIN:
-      case FS_OPCODE_COS:
+      case SHADER_OPCODE_RCP:
+      case SHADER_OPCODE_RSQ:
+      case SHADER_OPCODE_SQRT:
+      case SHADER_OPCODE_EXP2:
+      case SHADER_OPCODE_LOG2:
+      case SHADER_OPCODE_POW:
+      case SHADER_OPCODE_SIN:
+      case SHADER_OPCODE_COS:
 	 generate_math(inst, dst, src);
 	 break;
       case FS_OPCODE_PIXEL_X:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index 965a5b333a2..0ea4e5c36f0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -69,26 +69,26 @@ public:
       int math_latency = 22;
 
       switch (inst->opcode) {
-      case FS_OPCODE_RCP:
+      case SHADER_OPCODE_RCP:
 	 this->latency = 1 * chans * math_latency;
 	 break;
-      case FS_OPCODE_RSQ:
+      case SHADER_OPCODE_RSQ:
 	 this->latency = 2 * chans * math_latency;
 	 break;
-      case FS_OPCODE_SQRT:
-      case FS_OPCODE_LOG2:
+      case SHADER_OPCODE_SQRT:
+      case SHADER_OPCODE_LOG2:
 	 /* full precision log.  partial is 2. */
 	 this->latency = 3 * chans * math_latency;
 	 break;
-      case FS_OPCODE_EXP2:
+      case SHADER_OPCODE_EXP2:
 	 /* full precision.  partial is 3, same throughput. */
 	 this->latency = 4 * chans * math_latency;
 	 break;
-      case FS_OPCODE_POW:
+      case SHADER_OPCODE_POW:
 	 this->latency = 8 * chans * math_latency;
 	 break;
-      case FS_OPCODE_SIN:
-      case FS_OPCODE_COS:
+      case SHADER_OPCODE_SIN:
+      case SHADER_OPCODE_COS:
 	 /* minimum latency, max is 12 rounds. */
 	 this->latency = 5 * chans * math_latency;
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2e3f9be75b4..8b4f5bbac15 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -250,14 +250,14 @@ fs_visitor::visit(ir_expression *ir)
 
       break;
    case ir_unop_rcp:
-      emit_math(FS_OPCODE_RCP, this->result, op[0]);
+      emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
       break;
 
    case ir_unop_exp2:
-      emit_math(FS_OPCODE_EXP2, this->result, op[0]);
+      emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
       break;
    case ir_unop_log2:
-      emit_math(FS_OPCODE_LOG2, this->result, op[0]);
+      emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
       break;
    case ir_unop_exp:
    case ir_unop_log:
@@ -265,11 +265,11 @@ fs_visitor::visit(ir_expression *ir)
       break;
    case ir_unop_sin:
    case ir_unop_sin_reduced:
-      emit_math(FS_OPCODE_SIN, this->result, op[0]);
+      emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
       break;
    case ir_unop_cos:
    case ir_unop_cos_reduced:
-      emit_math(FS_OPCODE_COS, this->result, op[0]);
+      emit_math(SHADER_OPCODE_COS, this->result, op[0]);
       break;
 
    case ir_unop_dFdx:
@@ -340,11 +340,11 @@ fs_visitor::visit(ir_expression *ir)
       break;
 
    case ir_unop_sqrt:
-      emit_math(FS_OPCODE_SQRT, this->result, op[0]);
+      emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
       break;
 
    case ir_unop_rsq:
-      emit_math(FS_OPCODE_RSQ, this->result, op[0]);
+      emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
       break;
 
    case ir_unop_i2u:
@@ -423,7 +423,7 @@ fs_visitor::visit(ir_expression *ir)
       break;
 
    case ir_binop_pow:
-      emit_math(FS_OPCODE_POW, this->result, op[0], op[1]);
+      emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
       break;
 
    case ir_unop_bit_not:
@@ -1694,7 +1694,7 @@ fs_visitor::emit_interpolation_setup_gen4()
 	interp_reg(FRAG_ATTRIB_WPOS, 3));
    /* Compute the pixel 1/W value from wpos.w. */
    this->pixel_w = fs_reg(this, glsl_type::float_type);
-   emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
+   emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
    this->current_annotation = NULL;
 }
 
@@ -1731,7 +1731,7 @@ fs_visitor::emit_interpolation_setup_gen6()
    this->current_annotation = "compute pos.w";
    this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
    this->wpos_w = fs_reg(this, glsl_type::float_type);
-   emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w);
+   emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
 
    this->delta_x = fs_reg(brw_vec8_grf(2, 0));
    this->delta_y = fs_reg(brw_vec8_grf(3, 0));

From af3c9803d818fd33139f1247a387d64b967b8992 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 2 May 2011 09:45:40 -0700
Subject: [PATCH 356/600] i965: Start adding the VS visitor and codegen.

The low-level IR is a mashup of brw_fs.cpp and ir_to_mesa.cpp.  It's
currently controlled by the INTEL_NEW_VS=1 environment variable, and
only tested for the trivial "gl_Position = gl_Vertex;" shader so far.
---
 src/mesa/drivers/dri/i965/Makefile            |    5 +-
 src/mesa/drivers/dri/i965/brw_context.h       |    2 +-
 src/mesa/drivers/dri/i965/brw_defines.h       |    2 +
 src/mesa/drivers/dri/i965/brw_eu.h            |    3 +
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp     |   33 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp      |   26 +
 src/mesa/drivers/dri/i965/brw_shader.h        |    2 +
 src/mesa/drivers/dri/i965/brw_vec4.h          |  434 +++++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp   |  568 ++++++
 .../dri/i965/brw_vec4_reg_allocate.cpp        |   77 +
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 1649 +++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vs.c            |   12 +-
 src/mesa/drivers/dri/i965/brw_vs.h            |    3 +-
 src/mesa/drivers/dri/i965/brw_vs_emit.c       |    2 +-
 14 files changed, 2781 insertions(+), 37 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4.h
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 44f28cd9d15..45a5350a383 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -124,7 +124,10 @@ CXX_SOURCES = \
 	brw_fs_reg_allocate.cpp \
 	brw_fs_schedule_instructions.cpp \
 	brw_fs_vector_splitting.cpp \
-	brw_shader.cpp
+	brw_shader.cpp \
+	brw_vec4_emit.cpp \
+	brw_vec4_reg_allocate.cpp \
+	brw_vec4_visitor.cpp
 
 ASM_SOURCES = 
 
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index cc11d06874d..7b6b64c1a5c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -529,7 +529,7 @@ struct brw_context
        * the CURBE, the depth buffer, and a query BO.
        */
       drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
-      int validated_bo_count;
+      unsigned int validated_bo_count;
    } state;
 
    struct brw_cache cache;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index da8d016da42..e3823c65d1a 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -638,6 +638,8 @@ enum opcode {
    FS_OPCODE_SPILL,
    FS_OPCODE_UNSPILL,
    FS_OPCODE_PULL_CONSTANT_LOAD,
+
+   VS_OPCODE_URB_WRITE,
 };
 
 #define BRW_PREDICATE_NONE             0
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 72d50eadbce..38dd99b693d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -44,6 +44,9 @@
 #define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
 #define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
 #define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
+#define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
+#define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 285ba46bd46..7367ccaa7e0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -146,38 +146,7 @@ void
 fs_visitor::generate_math(fs_inst *inst,
 			  struct brw_reg dst, struct brw_reg *src)
 {
-   int op;
-
-   switch (inst->opcode) {
-   case SHADER_OPCODE_RCP:
-      op = BRW_MATH_FUNCTION_INV;
-      break;
-   case SHADER_OPCODE_RSQ:
-      op = BRW_MATH_FUNCTION_RSQ;
-      break;
-   case SHADER_OPCODE_SQRT:
-      op = BRW_MATH_FUNCTION_SQRT;
-      break;
-   case SHADER_OPCODE_EXP2:
-      op = BRW_MATH_FUNCTION_EXP;
-      break;
-   case SHADER_OPCODE_LOG2:
-      op = BRW_MATH_FUNCTION_LOG;
-      break;
-   case SHADER_OPCODE_POW:
-      op = BRW_MATH_FUNCTION_POW;
-      break;
-   case SHADER_OPCODE_SIN:
-      op = BRW_MATH_FUNCTION_SIN;
-      break;
-   case SHADER_OPCODE_COS:
-      op = BRW_MATH_FUNCTION_COS;
-      break;
-   default:
-      assert(!"not reached: unknown math function");
-      op = 0;
-      break;
-   }
+   int op = brw_math_function(inst->opcode);
 
    if (intel->gen >= 6) {
       assert(inst->mlen == 0);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index f4005f80055..2eeeec25cac 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -199,3 +199,29 @@ brw_conditional_for_comparison(unsigned int op)
       return BRW_CONDITIONAL_NZ;
    }
 }
+
+uint32_t
+brw_math_function(enum opcode op)
+{
+   switch (op) {
+   case SHADER_OPCODE_RCP:
+      return BRW_MATH_FUNCTION_INV;
+   case SHADER_OPCODE_RSQ:
+      return BRW_MATH_FUNCTION_RSQ;
+   case SHADER_OPCODE_SQRT:
+      return BRW_MATH_FUNCTION_SQRT;
+   case SHADER_OPCODE_EXP2:
+      return BRW_MATH_FUNCTION_EXP;
+   case SHADER_OPCODE_LOG2:
+      return BRW_MATH_FUNCTION_LOG;
+   case SHADER_OPCODE_POW:
+      return BRW_MATH_FUNCTION_POW;
+   case SHADER_OPCODE_SIN:
+      return BRW_MATH_FUNCTION_SIN;
+   case SHADER_OPCODE_COS:
+      return BRW_MATH_FUNCTION_COS;
+   default:
+      assert(!"not reached: unknown math function");
+      return 0;
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 21671d1c8d6..1054d7a589e 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -22,8 +22,10 @@
  */
 
 #include <stdint.h>
+#include "brw_defines.h"
 
 #pragma once
 
 int brw_type_for_base_type(const struct glsl_type *type);
 uint32_t brw_conditional_for_comparison(unsigned int op);
+uint32_t brw_math_function(enum opcode op);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
new file mode 100644
index 00000000000..10168fc1cb0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -0,0 +1,434 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_H
+#define BRW_VEC4_H
+
+#include <stdint.h>
+#include "brw_shader.h"
+#include "main/compiler.h"
+#include "program/hash_table.h"
+
+extern "C" {
+#include "brw_vs.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+};
+
+#include "../glsl/ir.h"
+
+namespace brw {
+
+class dst_reg;
+
+/**
+ * Common helper for constructing swizzles.  When only a subset of
+ * channels of a vec4 are used, we don't want to reference the other
+ * channels, as that will tell optimization passes that those other
+ * channels are used.
+ */
+static int
+swizzle_for_size(int size)
+{
+   int size_swizzles[4] = {
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
+   };
+
+   assert((size >= 1) && (size <= 4));
+   return size_swizzles[size - 1];
+}
+
+enum register_file {
+   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
+   GRF = BRW_GENERAL_REGISTER_FILE,
+   MRF = BRW_MESSAGE_REGISTER_FILE,
+   IMM = BRW_IMMEDIATE_VALUE,
+   HW_REG, /* a struct brw_reg */
+   ATTR,
+   UNIFORM, /* prog_data->params[hw_reg] */
+   BAD_FILE
+};
+
+class reg
+{
+public:
+   /** Register file: ARF, GRF, MRF, IMM. */
+   enum register_file file;
+   /** virtual register number.  0 = fixed hw reg */
+   int reg;
+   /** Offset within the virtual register. */
+   int reg_offset;
+   /** Register type.  BRW_REGISTER_TYPE_* */
+   int type;
+   bool sechalf;
+   struct brw_reg fixed_hw_reg;
+   int smear; /* -1, or a channel of the reg to smear to all channels. */
+
+   /** Value for file == BRW_IMMMEDIATE_FILE */
+   union {
+      int32_t i;
+      uint32_t u;
+      float f;
+   } imm;
+};
+
+class src_reg : public reg
+{
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   void init()
+   {
+      memset(this, 0, sizeof(*this));
+
+      this->file = BAD_FILE;
+   }
+
+   src_reg(register_file file, int reg, const glsl_type *type)
+   {
+      init();
+
+      this->file = file;
+      this->reg = reg;
+      if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
+	 this->swizzle = swizzle_for_size(type->vector_elements);
+      else
+	 this->swizzle = SWIZZLE_XYZW;
+   }
+
+   /** Generic unset register constructor. */
+   src_reg()
+   {
+      init();
+   }
+
+   src_reg(float f)
+   {
+      init();
+
+      this->file = IMM;
+      this->type = BRW_REGISTER_TYPE_F;
+      this->imm.f = f;
+   }
+
+   src_reg(uint32_t u)
+   {
+      init();
+
+      this->file = IMM;
+      this->type = BRW_REGISTER_TYPE_UD;
+      this->imm.f = u;
+   }
+
+   src_reg(int32_t i)
+   {
+      init();
+
+      this->file = IMM;
+      this->type = BRW_REGISTER_TYPE_D;
+      this->imm.i = i;
+   }
+
+   src_reg(class vec4_visitor *v, const struct glsl_type *type);
+
+   explicit src_reg(dst_reg reg);
+
+   GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
+   bool negate;
+   bool abs;
+};
+
+class dst_reg : public reg
+{
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   void init()
+   {
+      memset(this, 0, sizeof(*this));
+      this->file = BAD_FILE;
+      this->writemask = WRITEMASK_XYZW;
+   }
+
+   dst_reg()
+   {
+      init();
+   }
+
+   dst_reg(register_file file, int reg)
+   {
+      init();
+
+      this->file = file;
+      this->reg = reg;
+   }
+
+   dst_reg(struct brw_reg reg)
+   {
+      init();
+
+      this->file = HW_REG;
+      this->fixed_hw_reg = reg;
+   }
+
+   dst_reg(class vec4_visitor *v, const struct glsl_type *type);
+
+   explicit dst_reg(src_reg reg);
+
+   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+};
+
+class vec4_instruction : public exec_node {
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   struct brw_reg get_dst(void);
+   struct brw_reg get_src(int i);
+
+   enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+   dst_reg dst;
+   src_reg src[3];
+
+   bool saturate;
+   bool predicate_inverse;
+   uint32_t predicate;
+
+   int conditional_mod; /**< BRW_CONDITIONAL_* */
+
+   int sampler;
+   int target; /**< MRT target. */
+   bool shadow_compare;
+
+   bool eot;
+   bool header_present;
+   int mlen; /**< SEND message length */
+   int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
+
+   uint32_t offset; /* spill/unspill offset */
+   /** @{
+    * Annotation for the generated IR.  One of the two can be set.
+    */
+   ir_instruction *ir;
+   const char *annotation;
+};
+
+class vec4_visitor : public ir_visitor
+{
+public:
+   vec4_visitor(struct brw_vs_compile *c,
+		struct gl_shader_program *prog, struct brw_shader *shader);
+   ~vec4_visitor();
+
+   dst_reg dst_null_f()
+   {
+      return dst_reg(brw_null_reg());
+   }
+
+   dst_reg dst_null_d()
+   {
+      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   }
+
+   dst_reg dst_null_cmp()
+   {
+      if (intel->gen > 4)
+	 return dst_null_d();
+      else
+	 return dst_null_f();
+   }
+
+   struct brw_context *brw;
+   const struct gl_vertex_program *vp;
+   struct intel_context *intel;
+   struct gl_context *ctx;
+   struct brw_vs_compile *c;
+   struct brw_vs_prog_data *prog_data;
+   struct brw_compile *p;
+   struct brw_shader *shader;
+   struct gl_shader_program *prog;
+   void *mem_ctx;
+   exec_list instructions;
+
+   char *fail_msg;
+   bool failed;
+
+   /**
+    * GLSL IR currently being processed, which is associated with our
+    * driver IR instructions for debugging purposes.
+    */
+   ir_instruction *base_ir;
+   const char *current_annotation;
+
+   int *virtual_grf_sizes;
+   int virtual_grf_count;
+   int virtual_grf_array_size;
+   int first_non_payload_grf;
+
+   dst_reg *variable_storage(ir_variable *var);
+
+   void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
+
+   src_reg src_reg_for_float(float val);
+
+   /**
+    * \name Visit methods
+    *
+    * As typical for the visitor pattern, there must be one \c visit method for
+    * each concrete subclass of \c ir_instruction.  Virtual base classes within
+    * the hierarchy should not have \c visit methods.
+    */
+   /*@{*/
+   virtual void visit(ir_variable *);
+   virtual void visit(ir_loop *);
+   virtual void visit(ir_loop_jump *);
+   virtual void visit(ir_function_signature *);
+   virtual void visit(ir_function *);
+   virtual void visit(ir_expression *);
+   virtual void visit(ir_swizzle *);
+   virtual void visit(ir_dereference_variable  *);
+   virtual void visit(ir_dereference_array *);
+   virtual void visit(ir_dereference_record *);
+   virtual void visit(ir_assignment *);
+   virtual void visit(ir_constant *);
+   virtual void visit(ir_call *);
+   virtual void visit(ir_return *);
+   virtual void visit(ir_discard *);
+   virtual void visit(ir_texture *);
+   virtual void visit(ir_if *);
+   /*@}*/
+
+   src_reg result;
+
+   /* Regs for vertex results.  Generated at ir_variable visiting time
+    * for the ir->location's used.
+    */
+   dst_reg output_reg[VERT_RESULT_MAX];
+
+   struct hash_table *variable_ht;
+
+   bool run(void);
+   void fail(const char *msg, ...);
+
+   int virtual_grf_alloc(int size);
+   int setup_attributes(int payload_reg);
+   void setup_payload();
+   void reg_allocate_trivial();
+   void reg_allocate();
+
+   vec4_instruction *emit(enum opcode opcode);
+
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
+
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+			  src_reg src0, src_reg src1);
+
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+			  src_reg src0, src_reg src1, src_reg src2);
+
+   /** Walks an exec_list of ir_instruction and sends it through this visitor. */
+   void visit_instructions(const exec_list *list);
+
+   void emit_bool_to_cond_code(ir_rvalue *ir);
+   void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_if_gen6(ir_if *ir);
+
+   void emit_block_move(ir_assignment *ir);
+
+   /**
+    * Emit the correct dot-product instruction for the type of arguments
+    */
+   void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
+
+   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+		    dst_reg dst, src_reg src0);
+
+   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+		    dst_reg dst, src_reg src0, src_reg src1);
+
+   void emit_scs(ir_instruction *ir, enum prog_opcode op,
+		 dst_reg dst, const src_reg &src);
+
+   void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
+   void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
+   void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
+   void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+
+   int emit_vue_header_gen6(int header_mrf);
+   int emit_vue_header_gen4(int header_mrf);
+   void emit_urb_writes(void);
+
+   GLboolean try_emit_sat(ir_expression *ir);
+
+   bool process_move_condition(ir_rvalue *ir);
+
+   void generate_code();
+   void generate_vs_instruction(vec4_instruction *inst,
+				struct brw_reg dst,
+				struct brw_reg *src);
+   void generate_math1_gen4(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src);
+   void generate_math1_gen6(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src);
+   void generate_urb_write(vec4_instruction *inst);
+};
+
+} /* namespace brw */
+
+#endif /* BRW_VEC4_H */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
new file mode 100644
index 00000000000..bdc7a79d83d
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -0,0 +1,568 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "../glsl/ir_print_visitor.h"
+
+extern "C" {
+#include "brw_eu.h"
+};
+
+using namespace brw;
+
+namespace brw {
+
+int
+vec4_visitor::setup_attributes(int payload_reg)
+{
+   int nr_attributes;
+   int attribute_map[VERT_ATTRIB_MAX];
+
+   nr_attributes = 0;
+   for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
+      if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
+	 attribute_map[i] = payload_reg + nr_attributes;
+	 nr_attributes++;
+      }
+   }
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+      for (int i = 0; i < 3; i++) {
+	 if (inst->src[i].file != ATTR)
+	    continue;
+
+	 inst->src[i].file = HW_REG;
+	 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0);
+	 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
+      }
+   }
+
+   /* The BSpec says we always have to read at least one thing from
+    * the VF, and it appears that the hardware wedges otherwise.
+    */
+   if (nr_attributes == 0)
+      nr_attributes = 1;
+
+   prog_data->urb_read_length = (nr_attributes + 1) / 2;
+
+   return nr_attributes;
+}
+
+void
+vec4_visitor::setup_payload(void)
+{
+   int reg = 0;
+
+   /* r0 is always reserved, as it contains the payload with the URB
+    * handles that are passed on to the URB write at the end of the
+    * thread.
+    */
+   reg++;
+
+   /* User clip planes from curbe:
+    */
+   if (c->key.nr_userclip) {
+      if (intel->gen >= 6) {
+	 for (int i = 0; i < c->key.nr_userclip; i++) {
+	    c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
+						  (i % 2) * 4), 0, 4, 1);
+	 }
+	 reg += ALIGN(c->key.nr_userclip, 2) / 2;
+      } else {
+	 for (int i = 0; i < c->key.nr_userclip; i++) {
+	    c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
+						  (i % 2) * 4), 0, 4, 1);
+	 }
+	 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
+      }
+   }
+
+   /* FINISHME: push constants */
+   c->prog_data.curb_read_length = reg - 1;
+   c->prog_data.nr_params = 0;
+   /* XXX 0 causes a bug elsewhere... */
+   if (intel->gen < 6 && c->prog_data.nr_params == 0)
+      c->prog_data.nr_params = 4;
+
+   reg += setup_attributes(reg);
+
+   this->first_non_payload_grf = reg;
+}
+
+struct brw_reg
+vec4_instruction::get_dst(void)
+{
+   struct brw_reg brw_reg;
+
+   switch (dst.file) {
+   case GRF:
+      assert(dst.reg_offset == 0);
+      brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
+      brw_reg = retype(brw_reg, dst.type);
+      brw_reg.dw1.bits.writemask = dst.writemask;
+      break;
+
+   case HW_REG:
+      brw_reg = dst.fixed_hw_reg;
+      break;
+
+   case BAD_FILE:
+      brw_reg = brw_null_reg();
+      break;
+
+   default:
+      assert(!"not reached");
+      brw_reg = brw_null_reg();
+      break;
+   }
+   return brw_reg;
+}
+
+struct brw_reg
+vec4_instruction::get_src(int i)
+{
+   struct brw_reg brw_reg;
+
+   switch (src[i].file) {
+   case GRF:
+      brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
+      brw_reg = retype(brw_reg, src[i].type);
+      brw_reg.dw1.bits.swizzle = src[i].swizzle;
+      if (src[i].abs)
+	 brw_reg = brw_abs(brw_reg);
+      if (src[i].negate)
+	 brw_reg = negate(brw_reg);
+      break;
+
+   case IMM:
+      switch (src[i].type) {
+      case BRW_REGISTER_TYPE_F:
+	 brw_reg = brw_imm_f(src[i].imm.f);
+	 break;
+      case BRW_REGISTER_TYPE_D:
+	 brw_reg = brw_imm_d(src[i].imm.i);
+	 break;
+      case BRW_REGISTER_TYPE_UD:
+	 brw_reg = brw_imm_ud(src[i].imm.u);
+	 break;
+      default:
+	 assert(!"not reached");
+	 brw_reg = brw_null_reg();
+	 break;
+      }
+      break;
+
+   case HW_REG:
+      brw_reg = src[i].fixed_hw_reg;
+      break;
+
+   case BAD_FILE:
+      /* Probably unused. */
+      brw_reg = brw_null_reg();
+      break;
+   case ATTR:
+   default:
+      assert(!"not reached");
+      brw_reg = brw_null_reg();
+      break;
+   }
+
+   return brw_reg;
+}
+
+void
+vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src)
+{
+   brw_math(p,
+	    dst,
+	    brw_math_function(inst->opcode),
+	    BRW_MATH_SATURATE_NONE,
+	    inst->base_mrf,
+	    src,
+	    BRW_MATH_DATA_SCALAR,
+	    BRW_MATH_PRECISION_FULL);
+}
+
+void
+vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src)
+{
+   brw_math(p,
+	    dst,
+	    brw_math_function(inst->opcode),
+	    BRW_MATH_SATURATE_NONE,
+	    inst->base_mrf,
+	    src,
+	    BRW_MATH_DATA_SCALAR,
+	    BRW_MATH_PRECISION_FULL);
+}
+
+void
+vec4_visitor::generate_urb_write(vec4_instruction *inst)
+{
+   brw_urb_WRITE(p,
+		 brw_null_reg(), /* dest */
+		 inst->base_mrf, /* starting mrf reg nr */
+		 brw_vec8_grf(0, 0), /* src */
+		 false,		/* allocate */
+		 true,		/* used */
+		 inst->mlen,
+		 0,		/* response len */
+		 inst->eot,	/* eot */
+		 inst->eot,	/* writes complete */
+		 inst->offset,	/* urb destination offset */
+		 BRW_URB_SWIZZLE_INTERLEAVE);
+}
+
+void
+vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
+				      struct brw_reg dst,
+				      struct brw_reg *src)
+{
+   vec4_instruction *inst = (vec4_instruction *)instruction;
+
+   switch (inst->opcode) {
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      if (intel->gen >= 6) {
+	 generate_math1_gen6(inst, dst, src[0]);
+      } else {
+	 generate_math1_gen4(inst, dst, src[0]);
+      }
+      break;
+
+   case SHADER_OPCODE_POW:
+      assert(!"finishme");
+      break;
+
+   case VS_OPCODE_URB_WRITE:
+      generate_urb_write(inst);
+      break;
+
+   default:
+      if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
+	 fail("unsupported opcode in `%s' in VS\n",
+	      brw_opcodes[inst->opcode].name);
+      } else {
+	 fail("Unsupported opcode %d in VS", inst->opcode);
+      }
+   }
+}
+
+bool
+vec4_visitor::run()
+{
+   /* Generate FS IR for main().  (the visitor only descends into
+    * functions called "main").
+    */
+   foreach_iter(exec_list_iterator, iter, *shader->ir) {
+      ir_instruction *ir = (ir_instruction *)iter.get();
+      base_ir = ir;
+      ir->accept(this);
+   }
+
+   emit_urb_writes();
+
+   if (failed)
+      return false;
+
+   setup_payload();
+   reg_allocate();
+
+   brw_set_access_mode(p, BRW_ALIGN_16);
+
+   generate_code();
+
+   return !failed;
+}
+
+void
+vec4_visitor::generate_code()
+{
+   int last_native_inst = p->nr_insn;
+   const char *last_annotation_string = NULL;
+   ir_instruction *last_annotation_ir = NULL;
+
+   int loop_stack_array_size = 16;
+   int loop_stack_depth = 0;
+   brw_instruction **loop_stack =
+      rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
+   int *if_depth_in_loop =
+      rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
+
+
+   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+      printf("Native code for vertex shader %d:\n", prog->Name);
+   }
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      struct brw_reg src[3], dst;
+
+      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+	 if (last_annotation_ir != inst->ir) {
+	    last_annotation_ir = inst->ir;
+	    if (last_annotation_ir) {
+	       printf("   ");
+	       last_annotation_ir->print();
+	       printf("\n");
+	    }
+	 }
+	 if (last_annotation_string != inst->annotation) {
+	    last_annotation_string = inst->annotation;
+	    if (last_annotation_string)
+	       printf("   %s\n", last_annotation_string);
+	 }
+      }
+
+      for (unsigned int i = 0; i < 3; i++) {
+	 src[i] = inst->get_src(i);
+      }
+      dst = inst->get_dst();
+
+      brw_set_conditionalmod(p, inst->conditional_mod);
+      brw_set_predicate_control(p, inst->predicate);
+      brw_set_predicate_inverse(p, inst->predicate_inverse);
+      brw_set_saturate(p, inst->saturate);
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MOV:
+	 brw_MOV(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_ADD:
+	 brw_ADD(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_MUL:
+	 brw_MUL(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_FRC:
+	 brw_FRC(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_RNDD:
+	 brw_RNDD(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_RNDE:
+	 brw_RNDE(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_RNDZ:
+	 brw_RNDZ(p, dst, src[0]);
+	 break;
+
+      case BRW_OPCODE_AND:
+	 brw_AND(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_OR:
+	 brw_OR(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_XOR:
+	 brw_XOR(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_NOT:
+	 brw_NOT(p, dst, src[0]);
+	 break;
+      case BRW_OPCODE_ASR:
+	 brw_ASR(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_SHR:
+	 brw_SHR(p, dst, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_SHL:
+	 brw_SHL(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_CMP:
+	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
+	 break;
+      case BRW_OPCODE_SEL:
+	 brw_SEL(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_IF:
+	 if (inst->src[0].file != BAD_FILE) {
+	    /* The instruction has an embedded compare (only allowed on gen6) */
+	    assert(intel->gen == 6);
+	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
+	 } else {
+	    brw_IF(p, BRW_EXECUTE_8);
+	 }
+	 if_depth_in_loop[loop_stack_depth]++;
+	 break;
+
+      case BRW_OPCODE_ELSE:
+	 brw_ELSE(p);
+	 break;
+      case BRW_OPCODE_ENDIF:
+	 brw_ENDIF(p);
+	 if_depth_in_loop[loop_stack_depth]--;
+	 break;
+
+      case BRW_OPCODE_DO:
+	 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
+	 if (loop_stack_array_size <= loop_stack_depth) {
+	    loop_stack_array_size *= 2;
+	    loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
+				  loop_stack_array_size);
+	    if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
+				        loop_stack_array_size);
+	 }
+	 if_depth_in_loop[loop_stack_depth] = 0;
+	 break;
+
+      case BRW_OPCODE_BREAK:
+	 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	 break;
+      case BRW_OPCODE_CONTINUE:
+	 /* FINISHME: We need to write the loop instruction support still. */
+	 if (intel->gen >= 6)
+	    gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
+	 else
+	    brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	 break;
+
+      case BRW_OPCODE_WHILE: {
+	 struct brw_instruction *inst0, *inst1;
+	 GLuint br = 1;
+
+	 if (intel->gen >= 5)
+	    br = 2;
+
+	 assert(loop_stack_depth > 0);
+	 loop_stack_depth--;
+	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
+	 if (intel->gen < 6) {
+	    /* patch all the BREAK/CONT instructions from last BGNLOOP */
+	    while (inst0 > loop_stack[loop_stack_depth]) {
+	       inst0--;
+	       if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+		   inst0->bits3.if_else.jump_count == 0) {
+		  inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+	    }
+	       else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+			inst0->bits3.if_else.jump_count == 0) {
+		  inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+	       }
+	    }
+	 }
+      }
+	 break;
+
+      default:
+	 generate_vs_instruction(inst, dst, src);
+	 break;
+      }
+
+      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+	 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
+	    if (0) {
+	       printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+		      ((uint32_t *)&p->store[i])[3],
+		      ((uint32_t *)&p->store[i])[2],
+		      ((uint32_t *)&p->store[i])[1],
+		      ((uint32_t *)&p->store[i])[0]);
+	    }
+	    brw_disasm(stdout, &p->store[i], intel->gen);
+	 }
+      }
+
+      last_native_inst = p->nr_insn;
+   }
+
+   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+      printf("\n");
+   }
+
+   ralloc_free(loop_stack);
+   ralloc_free(if_depth_in_loop);
+
+   brw_set_uip_jip(p);
+
+   /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
+    * emit issues, it doesn't get the jump distances into the output,
+    * which is often something we want to debug.  So this is here in
+    * case you're doing that.
+    */
+   if (0) {
+      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+	 for (unsigned int i = 0; i < p->nr_insn; i++) {
+	    printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+		   ((uint32_t *)&p->store[i])[3],
+		   ((uint32_t *)&p->store[i])[2],
+		   ((uint32_t *)&p->store[i])[1],
+		   ((uint32_t *)&p->store[i])[0]);
+	    brw_disasm(stdout, &p->store[i], intel->gen);
+	 }
+      }
+   }
+}
+
+extern "C" {
+
+bool
+brw_vs_emit(struct brw_vs_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram;
+
+   if (!prog)
+      return false;
+
+   struct brw_shader *shader =
+     (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
+   if (!shader)
+      return false;
+
+   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+      printf("GLSL IR for native vertex shader %d:\n", prog->Name);
+      _mesa_print_ir(shader->ir, NULL);
+      printf("\n\n");
+   }
+
+   vec4_visitor v(c, prog, shader);
+   if (!v.run()) {
+      /* FINISHME: Cleanly fail, test at link time, etc. */
+      assert(!"not reached");
+      return false;
+   }
+
+   return true;
+}
+
+} /* extern "C" */
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
new file mode 100644
index 00000000000..e7f6b28a536
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "../glsl/ir_print_visitor.h"
+
+using namespace brw;
+
+namespace brw {
+
+static void
+assign(int *reg_hw_locations, reg *reg)
+{
+   if (reg->file == GRF) {
+      reg->reg = reg_hw_locations[reg->reg];
+   }
+}
+
+void
+vec4_visitor::reg_allocate_trivial()
+{
+   int last_grf = 0;
+   int hw_reg_mapping[this->virtual_grf_count];
+   int i;
+   int next;
+
+   /* Note that compressed instructions require alignment to 2 registers. */
+   hw_reg_mapping[0] = this->first_non_payload_grf;
+   next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
+   for (i = 1; i < this->virtual_grf_count; i++) {
+      hw_reg_mapping[i] = next;
+      next += this->virtual_grf_sizes[i];
+   }
+   prog_data->total_grf = next;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+      assign(hw_reg_mapping, &inst->dst);
+      assign(hw_reg_mapping, &inst->src[0]);
+      assign(hw_reg_mapping, &inst->src[1]);
+      assign(hw_reg_mapping, &inst->src[2]);
+   }
+
+   if (last_grf >= BRW_MAX_GRF) {
+      fail("Ran out of regs on trivial allocator (%d/%d)\n",
+	   last_grf, BRW_MAX_GRF);
+   }
+}
+
+void
+vec4_visitor::reg_allocate()
+{
+   reg_allocate_trivial();
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
new file mode 100644
index 00000000000..bba1d810f19
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -0,0 +1,1649 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "main/macros.h"
+
+namespace brw {
+
+src_reg::src_reg(dst_reg reg)
+{
+   init();
+
+   this->file = reg.file;
+   this->reg = reg.reg;
+   this->reg_offset = reg.reg_offset;
+   this->type = reg.type;
+
+   int swizzles[4];
+   int next_chan = 0;
+   int last = 0;
+
+   for (int i = 0; i < 4; i++) {
+      if (!(reg.writemask & (1 << i)))
+	 continue;
+
+      swizzles[next_chan++] = last = i;
+   }
+
+   for (; next_chan < 4; next_chan++) {
+      swizzles[next_chan] = last;
+   }
+
+   this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+				swizzles[2], swizzles[3]);
+}
+
+dst_reg::dst_reg(src_reg reg)
+{
+   init();
+
+   this->file = reg.file;
+   this->reg = reg.reg;
+   this->reg_offset = reg.reg_offset;
+   this->type = reg.type;
+   this->writemask = WRITEMASK_XYZW;
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst,
+		   src_reg src0, src_reg src1, src_reg src2)
+{
+   vec4_instruction *inst = new(mem_ctx) vec4_instruction();
+
+   inst->opcode = opcode;
+   inst->dst = dst;
+   inst->src[0] = src0;
+   inst->src[1] = src1;
+   inst->src[2] = src2;
+   inst->ir = this->base_ir;
+   inst->annotation = this->current_annotation;
+
+   this->instructions.push_tail(inst);
+
+   return inst;
+}
+
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
+{
+   return emit(opcode, dst, src0, src1, src_reg());
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
+{
+   assert(dst.writemask != 0);
+   return emit(opcode, dst, src0, src_reg(), src_reg());
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode)
+{
+   return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
+}
+
+void
+vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
+{
+   static enum opcode dot_opcodes[] = {
+      BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
+   };
+
+   emit(dot_opcodes[elements - 2], dst, src0, src1);
+}
+
+void
+vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
+{
+   /* The gen6 math instruction ignores the source modifiers --
+    * swizzle, abs, negate, and at least some parts of the register
+    * region description.  Move the source to the corresponding slots
+    * of the destination generally work.
+    */
+   src_reg expanded = src_reg(this, glsl_type::float_type);
+   emit(BRW_OPCODE_MOV, dst, src);
+   src = expanded;
+
+   emit(opcode, dst, src);
+}
+
+void
+vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
+{
+   vec4_instruction *inst = emit(opcode, dst, src);
+   inst->base_mrf = 1;
+   inst->mlen = 1;
+}
+
+void
+vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
+{
+   switch (opcode) {
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      break;
+   default:
+      assert(!"not reached: bad math opcode");
+      return;
+   }
+
+   if (intel->gen >= 6) {
+      return emit_math1_gen6(opcode, dst, src);
+   } else {
+      return emit_math1_gen4(opcode, dst, src);
+   }
+}
+
+void
+vec4_visitor::emit_math2_gen6(enum opcode opcode,
+			      dst_reg dst, src_reg src0, src_reg src1)
+{
+   src_reg expanded;
+
+   /* The gen6 math instruction ignores the source modifiers --
+    * swizzle, abs, negate, and at least some parts of the register
+    * region description.  Move the sources to temporaries to make it
+    * generally work.
+    */
+
+   expanded = src_reg(this, glsl_type::vec4_type);
+   emit(BRW_OPCODE_MOV, dst, src0);
+   src0 = expanded;
+
+   expanded = src_reg(this, glsl_type::vec4_type);
+   emit(BRW_OPCODE_MOV, dst, src1);
+   src1 = expanded;
+
+   emit(opcode, dst, src0, src1);
+}
+
+void
+vec4_visitor::emit_math2_gen4(enum opcode opcode,
+			      dst_reg dst, src_reg src0, src_reg src1)
+{
+   vec4_instruction *inst = emit(opcode, dst, src0, src1);
+   inst->base_mrf = 1;
+   inst->mlen = 2;
+}
+
+void
+vec4_visitor::emit_math(enum opcode opcode,
+			dst_reg dst, src_reg src0, src_reg src1)
+{
+   assert(opcode == SHADER_OPCODE_POW);
+
+   if (intel->gen >= 6) {
+      return emit_math2_gen6(opcode, dst, src0, src1);
+   } else {
+      return emit_math2_gen4(opcode, dst, src0, src1);
+   }
+}
+
+void
+vec4_visitor::visit_instructions(const exec_list *list)
+{
+   foreach_iter(exec_list_iterator, iter, *list) {
+      ir_instruction *ir = (ir_instruction *)iter.get();
+
+      base_ir = ir;
+      ir->accept(this);
+   }
+}
+
+
+static int
+type_size(const struct glsl_type *type)
+{
+   unsigned int i;
+   int size;
+
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      if (type->is_matrix()) {
+	 return type->matrix_columns;
+      } else {
+	 /* Regardless of size of vector, it gets a vec4. This is bad
+	  * packing for things like floats, but otherwise arrays become a
+	  * mess.  Hopefully a later pass over the code can pack scalars
+	  * down if appropriate.
+	  */
+	 return 1;
+      }
+   case GLSL_TYPE_ARRAY:
+      assert(type->length > 0);
+      return type_size(type->fields.array) * type->length;
+   case GLSL_TYPE_STRUCT:
+      size = 0;
+      for (i = 0; i < type->length; i++) {
+	 size += type_size(type->fields.structure[i].type);
+      }
+      return size;
+   case GLSL_TYPE_SAMPLER:
+      /* Samplers take up one slot in UNIFORMS[], but they're baked in
+       * at link time.
+       */
+      return 1;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+int
+vec4_visitor::virtual_grf_alloc(int size)
+{
+   if (virtual_grf_array_size <= virtual_grf_count) {
+      if (virtual_grf_array_size == 0)
+	 virtual_grf_array_size = 16;
+      else
+	 virtual_grf_array_size *= 2;
+      virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
+				   virtual_grf_array_size);
+   }
+   virtual_grf_sizes[virtual_grf_count] = size;
+   return virtual_grf_count++;
+}
+
+src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+   init();
+
+   this->file = GRF;
+   this->reg = v->virtual_grf_alloc(type_size(type));
+
+   if (type->is_array() || type->is_record()) {
+      this->swizzle = BRW_SWIZZLE_NOOP;
+   } else {
+      this->swizzle = swizzle_for_size(type->vector_elements);
+   }
+
+   this->type = brw_type_for_base_type(type);
+}
+
+dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+   init();
+
+   this->file = GRF;
+   this->reg = v->virtual_grf_alloc(type_size(type));
+
+   if (type->is_array() || type->is_record()) {
+      this->writemask = WRITEMASK_XYZW;
+   } else {
+      this->writemask = (1 << type->vector_elements) - 1;
+   }
+
+   this->type = brw_type_for_base_type(type);
+}
+
+dst_reg *
+vec4_visitor::variable_storage(ir_variable *var)
+{
+   return (dst_reg *)hash_table_find(this->variable_ht, var);
+}
+
+void
+vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
+{
+   ir_expression *expr = ir->as_expression();
+
+   if (expr) {
+      src_reg op[2];
+      vec4_instruction *inst;
+
+      assert(expr->get_num_operands() <= 2);
+      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+	 assert(expr->operands[i]->type->is_scalar());
+
+	 expr->operands[i]->accept(this);
+	 op[i] = this->result;
+      }
+
+      switch (expr->operation) {
+      case ir_unop_logic_not:
+	 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+	 break;
+
+      case ir_binop_logic_xor:
+	 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_binop_logic_or:
+	 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_binop_logic_and:
+	 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_unop_f2b:
+	 if (intel->gen >= 6) {
+	    inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
+	 } else {
+	    inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
+	 }
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_unop_i2b:
+	 if (intel->gen >= 6) {
+	    inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+	 } else {
+	    inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
+	 }
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 break;
+
+      case ir_binop_greater:
+      case ir_binop_gequal:
+      case ir_binop_less:
+      case ir_binop_lequal:
+      case ir_binop_equal:
+      case ir_binop_all_equal:
+      case ir_binop_nequal:
+      case ir_binop_any_nequal:
+	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+	 inst->conditional_mod =
+	    brw_conditional_for_comparison(expr->operation);
+	 break;
+
+      default:
+	 assert(!"not reached");
+	 break;
+      }
+      return;
+   }
+
+   ir->accept(this);
+
+   if (intel->gen >= 6) {
+      vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
+			       this->result, src_reg(1));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+   } else {
+      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+   }
+}
+
+/**
+ * Emit a gen6 IF statement with the comparison folded into the IF
+ * instruction.
+ */
+void
+vec4_visitor::emit_if_gen6(ir_if *ir)
+{
+   ir_expression *expr = ir->condition->as_expression();
+
+   if (expr) {
+      src_reg op[2];
+      vec4_instruction *inst;
+      dst_reg temp;
+
+      assert(expr->get_num_operands() <= 2);
+      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+	 assert(expr->operands[i]->type->is_scalar());
+
+	 expr->operands[i]->accept(this);
+	 op[i] = this->result;
+      }
+
+      switch (expr->operation) {
+      case ir_unop_logic_not:
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+	 return;
+
+      case ir_binop_logic_xor:
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_binop_logic_or:
+	 temp = dst_reg(this, glsl_type::bool_type);
+	 emit(BRW_OPCODE_OR, temp, op[0], op[1]);
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_binop_logic_and:
+	 temp = dst_reg(this, glsl_type::bool_type);
+	 emit(BRW_OPCODE_AND, temp, op[0], op[1]);
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_unop_f2b:
+	 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_unop_i2b:
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+
+      case ir_binop_greater:
+      case ir_binop_gequal:
+      case ir_binop_less:
+      case ir_binop_lequal:
+      case ir_binop_equal:
+      case ir_binop_all_equal:
+      case ir_binop_nequal:
+      case ir_binop_any_nequal:
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod =
+	    brw_conditional_for_comparison(expr->operation);
+	 return;
+      default:
+	 assert(!"not reached");
+	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 return;
+      }
+      return;
+   }
+
+   ir->condition->accept(this);
+
+   vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
+			    this->result, src_reg(0));
+   inst->conditional_mod = BRW_CONDITIONAL_NZ;
+}
+
+void
+vec4_visitor::visit(ir_variable *ir)
+{
+   dst_reg *reg = NULL;
+
+   if (variable_storage(ir))
+      return;
+
+   switch (ir->mode) {
+   case ir_var_in:
+      reg = new(mem_ctx) dst_reg(ATTR, ir->location);
+      reg->type = brw_type_for_base_type(ir->type);
+      hash_table_insert(this->variable_ht, reg, ir);
+      break;
+
+   case ir_var_out:
+      reg = new(mem_ctx) dst_reg(this, ir->type);
+      hash_table_insert(this->variable_ht, reg, ir);
+
+      for (int i = 0; i < type_size(ir->type); i++) {
+	 output_reg[ir->location + i] = *reg;
+	 output_reg[ir->location + i].reg_offset = i;
+      }
+      break;
+
+   case ir_var_temporary:
+      reg = new(mem_ctx) dst_reg(this, ir->type);
+      hash_table_insert(this->variable_ht, reg, ir);
+
+      break;
+
+   case ir_var_uniform:
+      /* FINISHME: uniforms */
+      break;
+   }
+}
+
+void
+vec4_visitor::visit(ir_loop *ir)
+{
+   ir_dereference_variable *counter = NULL;
+
+   /* We don't want debugging output to print the whole body of the
+    * loop as the annotation.
+    */
+   this->base_ir = NULL;
+
+   if (ir->counter != NULL)
+      counter = new(ir) ir_dereference_variable(ir->counter);
+
+   if (ir->from != NULL) {
+      assert(ir->counter != NULL);
+
+      ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
+
+      a->accept(this);
+      delete a;
+   }
+
+   emit(BRW_OPCODE_DO);
+
+   if (ir->to) {
+      ir_expression *e =
+	 new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
+			       counter, ir->to);
+      ir_if *if_stmt =  new(ir) ir_if(e);
+
+      ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
+
+      if_stmt->then_instructions.push_tail(brk);
+
+      if_stmt->accept(this);
+
+      delete if_stmt;
+      delete e;
+      delete brk;
+   }
+
+   visit_instructions(&ir->body_instructions);
+
+   if (ir->increment) {
+      ir_expression *e =
+	 new(ir) ir_expression(ir_binop_add, counter->type,
+			       counter, ir->increment);
+
+      ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
+
+      a->accept(this);
+      delete a;
+      delete e;
+   }
+
+   emit(BRW_OPCODE_WHILE);
+}
+
+void
+vec4_visitor::visit(ir_loop_jump *ir)
+{
+   switch (ir->mode) {
+   case ir_loop_jump::jump_break:
+      emit(BRW_OPCODE_BREAK);
+      break;
+   case ir_loop_jump::jump_continue:
+      emit(BRW_OPCODE_CONTINUE);
+      break;
+   }
+}
+
+
+void
+vec4_visitor::visit(ir_function_signature *ir)
+{
+   assert(0);
+   (void)ir;
+}
+
+void
+vec4_visitor::visit(ir_function *ir)
+{
+   /* Ignore function bodies other than main() -- we shouldn't see calls to
+    * them since they should all be inlined.
+    */
+   if (strcmp(ir->name, "main") == 0) {
+      const ir_function_signature *sig;
+      exec_list empty;
+
+      sig = ir->matching_signature(&empty);
+
+      assert(sig);
+
+      visit_instructions(&sig->body);
+   }
+}
+
+GLboolean
+vec4_visitor::try_emit_sat(ir_expression *ir)
+{
+   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+   if (!sat_src)
+      return false;
+
+   sat_src->accept(this);
+   src_reg src = this->result;
+
+   this->result = src_reg(this, ir->type);
+   vec4_instruction *inst;
+   inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
+   inst->saturate = true;
+
+   return true;
+}
+
+void
+vec4_visitor::emit_bool_comparison(unsigned int op,
+				 dst_reg dst, src_reg src0, src_reg src1)
+{
+   /* original gen4 does destination conversion before comparison. */
+   if (intel->gen < 5)
+      dst.type = src0.type;
+
+   vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
+   inst->conditional_mod = brw_conditional_for_comparison(op);
+
+   dst.type = BRW_REGISTER_TYPE_D;
+   emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
+}
+
+void
+vec4_visitor::visit(ir_expression *ir)
+{
+   unsigned int operand;
+   src_reg op[Elements(ir->operands)];
+   src_reg result_src;
+   dst_reg result_dst;
+   vec4_instruction *inst;
+
+   if (try_emit_sat(ir))
+      return;
+
+   for (operand = 0; operand < ir->get_num_operands(); operand++) {
+      this->result.file = BAD_FILE;
+      ir->operands[operand]->accept(this);
+      if (this->result.file == BAD_FILE) {
+	 printf("Failed to get tree for expression operand:\n");
+	 ir->operands[operand]->print();
+	 exit(1);
+      }
+      op[operand] = this->result;
+
+      /* Matrix expression operands should have been broken down to vector
+       * operations already.
+       */
+      assert(!ir->operands[operand]->type->is_matrix());
+   }
+
+   int vector_elements = ir->operands[0]->type->vector_elements;
+   if (ir->operands[1]) {
+      vector_elements = MAX2(vector_elements,
+			     ir->operands[1]->type->vector_elements);
+   }
+
+   this->result.file = BAD_FILE;
+
+   /* Storage for our result.  Ideally for an assignment we'd be using
+    * the actual storage for the result here, instead.
+    */
+   result_src = src_reg(this, ir->type);
+   /* convenience for the emit functions below. */
+   result_dst = dst_reg(result_src);
+   /* If nothing special happens, this is the result. */
+   this->result = result_src;
+   /* Limit writes to the channels that will be used by result_src later.
+    * This does limit this temp's use as a temporary for multi-instruction
+    * sequences.
+    */
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+   switch (ir->operation) {
+   case ir_unop_logic_not:
+      /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
+       * ones complement of the whole register, not just bit 0.
+       */
+      emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
+      break;
+   case ir_unop_neg:
+      op[0].negate = !op[0].negate;
+      this->result = op[0];
+      break;
+   case ir_unop_abs:
+      op[0].abs = true;
+      op[0].negate = false;
+      this->result = op[0];
+      break;
+
+   case ir_unop_sign:
+      emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
+
+      inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
+      inst->conditional_mod = BRW_CONDITIONAL_G;
+      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+
+      inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
+      inst->conditional_mod = BRW_CONDITIONAL_L;
+      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+
+      break;
+
+   case ir_unop_rcp:
+      emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
+      break;
+
+   case ir_unop_exp2:
+      emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
+      break;
+   case ir_unop_log2:
+      emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
+      break;
+   case ir_unop_exp:
+   case ir_unop_log:
+      assert(!"not reached: should be handled by ir_explog_to_explog2");
+      break;
+   case ir_unop_sin:
+   case ir_unop_sin_reduced:
+      emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos:
+   case ir_unop_cos_reduced:
+      emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
+      break;
+
+   case ir_unop_dFdx:
+   case ir_unop_dFdy:
+      assert(!"derivatives not valid in vertex shader");
+      break;
+
+   case ir_unop_noise:
+      assert(!"not reached: should be handled by lower_noise");
+      break;
+
+   case ir_binop_add:
+      emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_sub:
+      assert(!"not reached: should be handled by ir_sub_to_add_neg");
+      break;
+
+   case ir_binop_mul:
+      emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_div:
+      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+   case ir_binop_mod:
+      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      break;
+
+   case ir_binop_less:
+   case ir_binop_greater:
+   case ir_binop_lequal:
+   case ir_binop_gequal:
+   case ir_binop_equal:
+   case ir_binop_nequal: {
+      dst_reg temp = result_dst;
+      /* original gen4 does implicit conversion before comparison. */
+      if (intel->gen < 5)
+	 temp.type = op[0].type;
+
+      inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+      inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
+      emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
+      break;
+   }
+
+   case ir_binop_all_equal:
+      /* "==" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+	  ir->operands[1]->type->is_vector()) {
+	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+	 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+	 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+      } else {
+	 dst_reg temp = result_dst;
+	 /* original gen4 does implicit conversion before comparison. */
+	 if (intel->gen < 5)
+	    temp.type = op[0].type;
+
+	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
+      }
+      break;
+   case ir_binop_any_nequal:
+      /* "!=" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+	  ir->operands[1]->type->is_vector()) {
+	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+	 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+	 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+      } else {
+	 dst_reg temp = result_dst;
+	 /* original gen4 does implicit conversion before comparison. */
+	 if (intel->gen < 5)
+	    temp.type = op[0].type;
+
+	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
+      }
+      break;
+
+   case ir_unop_any:
+      emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+      emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+
+      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+      inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+      break;
+
+   case ir_binop_logic_xor:
+      emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_logic_or:
+      emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_logic_and:
+      emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_dot:
+      assert(ir->operands[0]->type->is_vector());
+      assert(ir->operands[0]->type == ir->operands[1]->type);
+      emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
+      break;
+
+   case ir_unop_sqrt:
+      emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
+      break;
+   case ir_unop_rsq:
+      emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
+      break;
+   case ir_unop_i2f:
+   case ir_unop_i2u:
+   case ir_unop_u2i:
+   case ir_unop_u2f:
+   case ir_unop_b2f:
+   case ir_unop_b2i:
+   case ir_unop_f2i:
+      emit(BRW_OPCODE_MOV, result_dst, op[0]);
+      break;
+   case ir_unop_f2b:
+   case ir_unop_i2b: {
+      dst_reg temp = result_dst;
+      /* original gen4 does implicit conversion before comparison. */
+      if (intel->gen < 5)
+	 temp.type = op[0].type;
+
+      inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+      inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
+      break;
+   }
+
+   case ir_unop_trunc:
+      emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
+      break;
+   case ir_unop_ceil:
+      op[0].negate = !op[0].negate;
+      inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
+      this->result.negate = true;
+      break;
+   case ir_unop_floor:
+      inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
+      break;
+   case ir_unop_fract:
+      inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
+      break;
+   case ir_unop_round_even:
+      emit(BRW_OPCODE_RNDE, result_dst, op[0]);
+      break;
+
+   case ir_binop_min:
+      inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
+      inst->conditional_mod = BRW_CONDITIONAL_L;
+
+      inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+   case ir_binop_max:
+      inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
+      inst->conditional_mod = BRW_CONDITIONAL_G;
+
+      inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+
+   case ir_binop_pow:
+      emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
+      break;
+
+   case ir_unop_bit_not:
+      inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
+      break;
+   case ir_binop_bit_and:
+      inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_bit_xor:
+      inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_bit_or:
+      inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_lshift:
+   case ir_binop_rshift:
+      assert(!"GLSL 1.30 features unsupported");
+      break;
+
+   case ir_quadop_vector:
+      assert(!"not reached: should be handled by lower_quadop_vector");
+      break;
+   }
+}
+
+
+void
+vec4_visitor::visit(ir_swizzle *ir)
+{
+   src_reg src;
+   int i = 0;
+   int swizzle[4];
+
+   /* Note that this is only swizzles in expressions, not those on the left
+    * hand side of an assignment, which do write masking.  See ir_assignment
+    * for that.
+    */
+
+   ir->val->accept(this);
+   src = this->result;
+   assert(src.file != BAD_FILE);
+
+   if (i < ir->type->vector_elements) {
+      switch (i) {
+      case 0:
+	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
+	 break;
+      case 1:
+	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
+	 break;
+      case 2:
+	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
+	 break;
+      case 3:
+	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
+	    break;
+      }
+   }
+   for (; i < 4; i++) {
+      /* Replicate the last channel out. */
+      swizzle[i] = swizzle[ir->type->vector_elements - 1];
+   }
+
+   src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+
+   this->result = src;
+}
+
+void
+vec4_visitor::visit(ir_dereference_variable *ir)
+{
+   dst_reg *reg = variable_storage(ir->var);
+
+   if (!reg) {
+      fail("Failed to find variable storage for %s\n", ir->var->name);
+      this->result = src_reg(brw_null_reg());
+      return;
+   }
+
+   this->result = src_reg(*reg);
+}
+
+void
+vec4_visitor::visit(ir_dereference_array *ir)
+{
+   ir_constant *constant_index;
+   src_reg src;
+   int element_size = type_size(ir->type);
+
+   constant_index = ir->array_index->constant_expression_value();
+
+   ir->array->accept(this);
+   src = this->result;
+
+   if (constant_index) {
+      src.reg_offset += constant_index->value.i[0] * element_size;
+   } else {
+#if 0 /* Variable array index */
+      /* Variable index array dereference.  It eats the "vec4" of the
+       * base of the array and an index that offsets the Mesa register
+       * index.
+       */
+      ir->array_index->accept(this);
+
+      src_reg index_reg;
+
+      if (element_size == 1) {
+	 index_reg = this->result;
+      } else {
+	 index_reg = src_reg(this, glsl_type::float_type);
+
+	 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
+	      this->result, src_reg_for_float(element_size));
+      }
+
+      src.reladdr = ralloc(mem_ctx, src_reg);
+      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+#endif
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      src.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      src.swizzle = BRW_SWIZZLE_NOOP;
+
+   this->result = src;
+}
+
+void
+vec4_visitor::visit(ir_dereference_record *ir)
+{
+   unsigned int i;
+   const glsl_type *struct_type = ir->record->type;
+   int offset = 0;
+
+   ir->record->accept(this);
+
+   for (i = 0; i < struct_type->length; i++) {
+      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+	 break;
+      offset += type_size(struct_type->fields.structure[i].type);
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      this->result.swizzle = BRW_SWIZZLE_NOOP;
+
+   this->result.reg_offset += offset;
+}
+
+/**
+ * We want to be careful in assignment setup to hit the actual storage
+ * instead of potentially using a temporary like we might with the
+ * ir_dereference handler.
+ */
+static dst_reg
+get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
+{
+   /* The LHS must be a dereference.  If the LHS is a variable indexed array
+    * access of a vector, it must be separated into a series conditional moves
+    * before reaching this point (see ir_vec_index_to_cond_assign).
+    */
+   assert(ir->as_dereference());
+   ir_dereference_array *deref_array = ir->as_dereference_array();
+   if (deref_array) {
+      assert(!deref_array->array->type->is_vector());
+   }
+
+   /* Use the rvalue deref handler for the most part.  We'll ignore
+    * swizzles in it and write swizzles using writemask, though.
+    */
+   ir->accept(v);
+   return dst_reg(v->result);
+}
+
+void
+vec4_visitor::emit_block_move(ir_assignment *ir)
+{
+   ir->rhs->accept(this);
+   src_reg src = this->result;
+
+   dst_reg dst = get_assignment_lhs(ir->lhs, this);
+
+   /* FINISHME: This should really set to the correct maximal writemask for each
+    * FINISHME: component written (in the loops below).
+    */
+   dst.writemask = WRITEMASK_XYZW;
+
+   for (int i = 0; i < type_size(ir->lhs->type); i++) {
+      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
+      if (ir->condition)
+	 inst->predicate = BRW_PREDICATE_NORMAL;
+
+      dst.reg_offset++;
+      src.reg_offset++;
+   }
+}
+
+void
+vec4_visitor::visit(ir_assignment *ir)
+{
+   if (!ir->lhs->type->is_scalar() &&
+       !ir->lhs->type->is_vector()) {
+      emit_block_move(ir);
+      return;
+   }
+
+   /* Now we're down to just a scalar/vector with writemasks. */
+   int i;
+
+   ir->rhs->accept(this);
+   src_reg src = this->result;
+
+   dst_reg dst = get_assignment_lhs(ir->lhs, this);
+
+   int swizzles[4];
+   int first_enabled_chan = 0;
+   int src_chan = 0;
+
+   assert(ir->lhs->type->is_vector());
+   dst.writemask = ir->write_mask;
+
+   for (int i = 0; i < 4; i++) {
+      if (dst.writemask & (1 << i)) {
+	 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
+	 break;
+      }
+   }
+
+   /* Swizzle a small RHS vector into the channels being written.
+    *
+    * glsl ir treats write_mask as dictating how many channels are
+    * present on the RHS while in our instructions we need to make
+    * those channels appear in the slots of the vec4 they're written to.
+    */
+   for (int i = 0; i < 4; i++) {
+      if (dst.writemask & (1 << i))
+	 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
+      else
+	 swizzles[i] = first_enabled_chan;
+   }
+   src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+			      swizzles[2], swizzles[3]);
+
+   if (ir->condition) {
+      emit_bool_to_cond_code(ir->condition);
+   }
+
+   for (i = 0; i < type_size(ir->lhs->type); i++) {
+      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
+
+      if (ir->condition)
+	 inst->predicate = BRW_PREDICATE_NORMAL;
+
+      dst.reg_offset++;
+      src.reg_offset++;
+   }
+}
+
+
+void
+vec4_visitor::visit(ir_constant *ir)
+{
+   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+      src_reg temp_base = src_reg(this, ir->type);
+      dst_reg temp = dst_reg(temp_base);
+
+      foreach_iter(exec_list_iterator, iter, ir->components) {
+	 ir_constant *field_value = (ir_constant *)iter.get();
+	 int size = type_size(field_value->type);
+
+	 assert(size > 0);
+
+	 field_value->accept(this);
+	 src_reg src = this->result;
+
+	 for (int i = 0; i < (unsigned int)size; i++) {
+	    emit(BRW_OPCODE_MOV, temp, src);
+
+	    src.reg_offset++;
+	    temp.reg_offset++;
+	 }
+      }
+      this->result = temp_base;
+      return;
+   }
+
+   if (ir->type->is_array()) {
+      src_reg temp_base = src_reg(this, ir->type);
+      dst_reg temp = dst_reg(temp_base);
+      int size = type_size(ir->type->fields.array);
+
+      assert(size > 0);
+
+      for (unsigned int i = 0; i < ir->type->length; i++) {
+	 ir->array_elements[i]->accept(this);
+	 src_reg src = this->result;
+	 for (int j = 0; j < size; j++) {
+	    emit(BRW_OPCODE_MOV, temp, src);
+
+	    src.reg_offset++;
+	    temp.reg_offset++;
+	 }
+      }
+      this->result = temp_base;
+      return;
+   }
+
+   if (ir->type->is_matrix()) {
+      this->result = src_reg(this, ir->type);
+      dst_reg dst = dst_reg(this->result);
+
+      assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+
+      for (int i = 0; i < ir->type->matrix_columns; i++) {
+	 for (int j = 0; j < ir->type->vector_elements; j++) {
+	    dst.writemask = 1 << j;
+	    emit(BRW_OPCODE_MOV, dst,
+		 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
+	 }
+	 dst.reg_offset++;
+      }
+      return;
+   }
+
+   for (int i = 0; i < ir->type->vector_elements; i++) {
+      this->result = src_reg(this, ir->type);
+      dst_reg dst = dst_reg(this->result);
+
+      dst.writemask = 1 << i;
+
+      switch (ir->type->base_type) {
+      case GLSL_TYPE_FLOAT:
+	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i]));
+	 break;
+      case GLSL_TYPE_INT:
+	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i]));
+	 break;
+      case GLSL_TYPE_UINT:
+	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i]));
+	 break;
+      case GLSL_TYPE_BOOL:
+	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i]));
+	 break;
+      default:
+	 assert(!"Non-float/uint/int/bool constant");
+	 break;
+      }
+   }
+}
+
+void
+vec4_visitor::visit(ir_call *ir)
+{
+   assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_texture *ir)
+{
+   assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_return *ir)
+{
+   assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_discard *ir)
+{
+   assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_if *ir)
+{
+   this->base_ir = ir->condition;
+   ir->condition->accept(this);
+   assert(this->result.file != BAD_FILE);
+
+   /* FINISHME: condcode */
+   emit(BRW_OPCODE_IF);
+
+   visit_instructions(&ir->then_instructions);
+
+   if (!ir->else_instructions.is_empty()) {
+      this->base_ir = ir->condition;
+      emit(BRW_OPCODE_ELSE);
+
+      visit_instructions(&ir->else_instructions);
+   }
+
+   this->base_ir = ir->condition;
+   emit(BRW_OPCODE_ENDIF);
+}
+
+int
+vec4_visitor::emit_vue_header_gen4(int header_mrf)
+{
+   /* Get the position */
+   src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
+
+   /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
+   dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
+
+   current_annotation = "NDC";
+   dst_reg ndc_w = ndc;
+   ndc_w.writemask = WRITEMASK_W;
+   src_reg pos_w = pos;
+   pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
+   emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
+
+   dst_reg ndc_xyz = ndc;
+   ndc_xyz.writemask = WRITEMASK_XYZ;
+
+   emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
+
+   if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
+       c->key.nr_userclip || brw->has_negative_rhw_bug) {
+      dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
+      GLuint i;
+
+      emit(BRW_OPCODE_MOV, header1, 0u);
+
+      if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+	 assert(!"finishme: psiz");
+	 src_reg psiz;
+
+	 header1.writemask = WRITEMASK_W;
+	 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
+	 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
+      }
+
+      for (i = 0; i < c->key.nr_userclip; i++) {
+	 vec4_instruction *inst;
+
+	 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
+		     pos, src_reg(c->userplane[i]));
+	 inst->conditional_mod = BRW_CONDITIONAL_L;
+
+	 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
+	 inst->predicate = BRW_PREDICATE_NORMAL;
+      }
+
+      /* i965 clipping workaround:
+       * 1) Test for -ve rhw
+       * 2) If set,
+       *      set ndc = (0,0,0,0)
+       *      set ucp[6] = 1
+       *
+       * Later, clipping will detect ucp[6] and ensure the primitive is
+       * clipped against all fixed planes.
+       */
+      if (brw->has_negative_rhw_bug) {
+#if 0
+	 /* FINISHME */
+	 brw_CMP(p,
+		 vec8(brw_null_reg()),
+		 BRW_CONDITIONAL_L,
+		 brw_swizzle1(ndc, 3),
+		 brw_imm_f(0));
+
+	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+	 brw_MOV(p, ndc, brw_imm_f(0));
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+#endif
+      }
+
+      header1.writemask = WRITEMASK_XYZW;
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
+   } else {
+      emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
+				  BRW_REGISTER_TYPE_UD), 0u);
+   }
+
+   if (intel->gen == 5) {
+      /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
+       * dword 0-3 (m1) of the header is indices, point width, clip flags.
+       * dword 4-7 (m2) is the ndc position (set above)
+       * dword 8-11 (m3) of the vertex header is the 4D space position
+       * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
+       * m6 is a pad so that the vertex element data is aligned
+       * m7 is the first vertex data we fill, which is the vertex position.
+       */
+      current_annotation = "NDC";
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
+
+      current_annotation = "gl_Position";
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
+
+      /* user clip distance. */
+      header_mrf += 2;
+
+      /* Pad so that vertex element data (starts with position) is aligned. */
+      header_mrf++;
+   } else {
+      /* There are 8 dwords in VUE header pre-Ironlake:
+       * dword 0-3 (m1) is indices, point width, clip flags.
+       * dword 4-7 (m2) is ndc position (set above)
+       *
+       * dword 8-11 (m3) is the first vertex data, which we always have be the
+       * vertex position.
+       */
+      current_annotation = "NDC";
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
+
+      current_annotation = "gl_Position";
+      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
+   }
+
+   return header_mrf;
+}
+
+int
+vec4_visitor::emit_vue_header_gen6(int header_mrf)
+{
+   struct brw_reg reg;
+
+   /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
+    * dword 0-3 (m2) of the header is indices, point width, clip flags.
+    * dword 4-7 (m3) is the 4D space position
+    * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
+    * enabled.
+    *
+    * m4 or 6 is the first vertex element data we fill, which is
+    * the vertex position.
+    */
+
+   current_annotation = "indices, point width, clip flags";
+   reg = brw_message_reg(header_mrf++);
+   emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
+   if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+      emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
+	   src_reg(output_reg[VERT_RESULT_PSIZ]));
+   }
+
+   current_annotation = "gl_Position";
+   emit(BRW_OPCODE_MOV,
+	brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
+
+   current_annotation = "user clip distances";
+   if (c->key.nr_userclip) {
+      for (int i = 0; i < c->key.nr_userclip; i++) {
+	 struct brw_reg m;
+	 if (i < 4)
+	    m = brw_message_reg(header_mrf);
+	 else
+	    m = brw_message_reg(header_mrf + 1);
+
+	 emit(BRW_OPCODE_DP4,
+	      dst_reg(brw_writemask(m, 1 << (i & 7))),
+	      src_reg(c->userplane[i]));
+      }
+      header_mrf += 2;
+   }
+
+   current_annotation = NULL;
+
+   return header_mrf;
+}
+
+static int
+align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
+{
+   struct intel_context *intel = &brw->intel;
+
+   if (intel->gen >= 6) {
+      /* URB data written (does not include the message header reg) must
+       * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
+       * section 5.4.3.2.2: URB_INTERLEAVED.
+       *
+       * URB entries are allocated on a multiple of 1024 bits, so an
+       * extra 128 bits written here to make the end align to 256 is
+       * no problem.
+       */
+      if ((mlen % 2) != 1)
+	 mlen++;
+   }
+
+   return mlen;
+}
+
+/**
+ * Generates the VUE payload plus the 1 or 2 URB write instructions to
+ * complete the VS thread.
+ *
+ * The VUE layout is documented in Volume 2a.
+ */
+void
+vec4_visitor::emit_urb_writes()
+{
+   int base_mrf = 1;
+   int mrf = base_mrf;
+   int urb_entry_size;
+
+   /* FINISHME: edgeflag */
+
+   /* First mrf is the g0-based message header containing URB handles and such,
+    * which is implied in VS_OPCODE_URB_WRITE.
+    */
+   mrf++;
+
+   if (intel->gen >= 6) {
+      mrf = emit_vue_header_gen6(mrf);
+   } else {
+      mrf = emit_vue_header_gen4(mrf);
+   }
+
+   int attr;
+   for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
+      if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+	 continue;
+
+      /* This is loaded into the VUE header, and thus doesn't occupy
+       * an attribute slot.
+       */
+      if (attr == VERT_RESULT_PSIZ)
+	 continue;
+
+      emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
+
+      /* If this is MRF 15, we can't fit anything more into this URB
+       * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
+       * even-numbered amount of URB write data, which will meet
+       * gen6's requirements for length alignment.
+       */
+      if (mrf == 15)
+	 break;
+   }
+
+   vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+   inst->base_mrf = base_mrf;
+   inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+   inst->eot = true;
+
+   urb_entry_size = mrf - base_mrf;
+
+   for (; attr < VERT_RESULT_MAX; attr++) {
+      if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+	 continue;
+      fail("Second URB write not supported.\n");
+      break;
+   }
+
+   if (intel->gen == 6)
+      c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
+   else
+      c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
+}
+
+vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
+			   struct gl_shader_program *prog,
+			   struct brw_shader *shader)
+{
+   this->c = c;
+   this->p = &c->func;
+   this->brw = p->brw;
+   this->intel = &brw->intel;
+   this->ctx = &intel->ctx;
+   this->prog = prog;
+   this->shader = shader;
+
+   this->mem_ctx = ralloc_context(NULL);
+   this->failed = false;
+
+   this->base_ir = NULL;
+   this->current_annotation = NULL;
+
+   this->c = c;
+   this->prog_data = &c->prog_data;
+
+   this->variable_ht = hash_table_ctor(0,
+				       hash_table_pointer_hash,
+				       hash_table_pointer_compare);
+
+   this->virtual_grf_sizes = NULL;
+   this->virtual_grf_count = 0;
+   this->virtual_grf_array_size = 0;
+}
+
+vec4_visitor::~vec4_visitor()
+{
+   hash_table_dtor(this->variable_ht);
+}
+
+
+void
+vec4_visitor::fail(const char *format, ...)
+{
+   va_list va;
+   char *msg;
+
+   if (failed)
+      return;
+
+   failed = true;
+
+   va_start(va, format);
+   msg = ralloc_vasprintf(mem_ctx, format, va);
+   va_end(va);
+   msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
+
+   this->fail_msg = msg;
+
+   if (INTEL_DEBUG & DEBUG_VS) {
+      fprintf(stderr, "%s",  msg);
+   }
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index a9ad5311fe3..bd0677db151 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -30,6 +30,7 @@
   */
            
 
+#include "main/compiler.h"
 #include "brw_context.h"
 #include "brw_vs.h"
 #include "brw_util.h"
@@ -50,6 +51,7 @@ static void do_vs_prog( struct brw_context *brw,
    void *mem_ctx;
    int aux_size;
    int i;
+   static int new_vs = -1;
 
    memset(&c, 0, sizeof(c));
    memcpy(&c.key, key, sizeof(*key));
@@ -85,7 +87,15 @@ static void do_vs_prog( struct brw_context *brw,
 
    /* Emit GEN4 code.
     */
-   brw_vs_emit(&c);
+   if (new_vs == -1)
+      new_vs = getenv("INTEL_NEW_VS") != NULL;
+
+   if (new_vs) {
+      if (!brw_vs_emit(&c))
+	 brw_old_vs_emit(&c);
+   } else {
+      brw_old_vs_emit(&c);
+   }
 
    /* get the program
     */
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 432994a8534..9f9fed33970 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -92,6 +92,7 @@ struct brw_vs_compile {
    GLboolean needs_stack;
 };
 
-void brw_vs_emit( struct brw_vs_compile *c );
+bool brw_vs_emit(struct brw_vs_compile *c);
+void brw_old_vs_emit(struct brw_vs_compile *c);
 
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index dbabb44e45c..a06a2bbec52 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1903,7 +1903,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c)
 
 /* Emit the vertex program instructions here.
  */
-void brw_vs_emit(struct brw_vs_compile *c )
+void brw_old_vs_emit(struct brw_vs_compile *c )
 {
 #define MAX_IF_DEPTH 32
 #define MAX_LOOP_DEPTH 32

From a070d5f363e99b0f846d555e9ca3a74ec807fdc0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 4 May 2011 12:50:16 -0700
Subject: [PATCH 357/600] i965/vs: Start adding support for uniforms

There's no clever packing here, no pull constants, and no array support.
---
 src/mesa/drivers/dri/i965/brw_context.h       |  22 ++-
 src/mesa/drivers/dri/i965/brw_curbe.c         |  27 ++--
 src/mesa/drivers/dri/i965/brw_vec4.h          |   5 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp   |  75 ++++++---
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 150 +++++++++++++++++-
 .../drivers/dri/i965/brw_wm_surface_state.c   |   2 +-
 src/mesa/drivers/dri/i965/gen6_vs_state.c     |  21 ++-
 src/mesa/drivers/dri/i965/gen6_wm_state.c     |   2 +-
 src/mesa/drivers/dri/i965/gen7_wm_state.c     |   2 +-
 9 files changed, 257 insertions(+), 49 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 7b6b64c1a5c..4a1abd6252e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -248,6 +248,7 @@ enum param_conversion {
    PARAM_CONVERT_F2I,
    PARAM_CONVERT_F2U,
    PARAM_CONVERT_F2B,
+   PARAM_CONVERT_ZERO,
 };
 
 /* Data about a particular attempt to compile a program.  Note that
@@ -317,6 +318,13 @@ struct brw_vs_prog_data {
    /* Used for calculating urb partitions:
     */
    GLuint urb_entry_size;
+
+   const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
+   enum param_conversion param_convert[MAX_UNIFORMS * 4];
+   const float *pull_param[MAX_UNIFORMS * 4];
+   enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
+
+   bool uses_new_param_layout;
 };
 
 
@@ -898,7 +906,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p)
 }
 
 static inline
-float convert_param(enum param_conversion conversion, float param)
+float convert_param(enum param_conversion conversion, const float *param)
 {
    union {
       float f;
@@ -908,21 +916,23 @@ float convert_param(enum param_conversion conversion, float param)
 
    switch (conversion) {
    case PARAM_NO_CONVERT:
-      return param;
+      return *param;
    case PARAM_CONVERT_F2I:
-      fi.i = param;
+      fi.i = *param;
       return fi.f;
    case PARAM_CONVERT_F2U:
-      fi.u = param;
+      fi.u = *param;
       return fi.f;
    case PARAM_CONVERT_F2B:
-      if (param != 0.0)
+      if (*param != 0.0)
 	 fi.i = 1;
       else
 	 fi.i = 0;
       return fi.f;
+   case PARAM_CONVERT_ZERO:
+      return 0.0;
    default:
-      return param;
+      return *param;
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index ae11c487a2c..960be10006e 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -203,7 +203,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
       /* copy float constants */
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
 	 buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i],
-					 *brw->wm.prog_data->param[i]);
+					 brw->wm.prog_data->param[i]);
       }
    }
 
@@ -244,15 +244,22 @@ static void prepare_constant_buffer(struct brw_context *brw)
       GLuint offset = brw->curbe.vs_start * 16;
       GLuint nr = brw->vs.prog_data->nr_params / 4;
 
-      /* Load the subset of push constants that will get used when
-       * we also have a pull constant buffer.
-       */
-      for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
-	 if (brw->vs.constant_map[i] != -1) {
-	    assert(brw->vs.constant_map[i] <= nr);
-	    memcpy(buf + offset + brw->vs.constant_map[i] * 4,
-		   vp->program.Base.Parameters->ParameterValues[i],
-		   4 * sizeof(float));
+      if (brw->vs.prog_data->uses_new_param_layout) {
+	 for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+	    buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i],
+					    brw->vs.prog_data->param[i]);
+	 }
+      } else {
+	 /* Load the subset of push constants that will get used when
+	  * we also have a pull constant buffer.
+	  */
+	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+	    if (brw->vs.constant_map[i] != -1) {
+	       assert(brw->vs.constant_map[i] <= nr);
+	       memcpy(buf + offset + brw->vs.constant_map[i] * 4,
+		      vp->program.Base.Parameters->ParameterValues[i],
+		      4 * sizeof(float));
+	    }
 	 }
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 10168fc1cb0..01058243f04 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -356,6 +356,8 @@ public:
     * for the ir->location's used.
     */
    dst_reg output_reg[VERT_RESULT_MAX];
+   int uniform_size[MAX_UNIFORMS];
+   int uniforms;
 
    struct hash_table *variable_ht;
 
@@ -363,7 +365,10 @@ public:
    void fail(const char *msg, ...);
 
    int virtual_grf_alloc(int size);
+   int setup_uniform_values(int loc, const glsl_type *type);
+   void setup_builtin_uniform_values(ir_variable *ir);
    int setup_attributes(int payload_reg);
+   int setup_uniforms(int payload_reg);
    void setup_payload();
    void reg_allocate_trivial();
    void reg_allocate();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index bdc7a79d83d..1f2853e1118 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -67,20 +67,12 @@ vec4_visitor::setup_attributes(int payload_reg)
 
    prog_data->urb_read_length = (nr_attributes + 1) / 2;
 
-   return nr_attributes;
+   return payload_reg + nr_attributes;
 }
 
-void
-vec4_visitor::setup_payload(void)
+int
+vec4_visitor::setup_uniforms(int reg)
 {
-   int reg = 0;
-
-   /* r0 is always reserved, as it contains the payload with the URB
-    * handles that are passed on to the URB write at the end of the
-    * thread.
-    */
-   reg++;
-
    /* User clip planes from curbe:
     */
    if (c->key.nr_userclip) {
@@ -99,14 +91,49 @@ vec4_visitor::setup_payload(void)
       }
    }
 
-   /* FINISHME: push constants */
-   c->prog_data.curb_read_length = reg - 1;
-   c->prog_data.nr_params = 0;
-   /* XXX 0 causes a bug elsewhere... */
-   if (intel->gen < 6 && c->prog_data.nr_params == 0)
-      c->prog_data.nr_params = 4;
+   /* The pre-gen6 VS requires that some push constants get loaded no
+    * matter what, or the GPU would hang.
+    */
+   if (this->uniforms == 0) {
+      this->uniform_size[this->uniforms] = 1;
 
-   reg += setup_attributes(reg);
+      for (unsigned int i = 0; i < 4; i++) {
+	 unsigned int slot = this->uniforms * 4 + i;
+
+	 c->prog_data.param[slot] = NULL;
+	 c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO;
+      }
+
+      this->uniforms++;
+   } else {
+      reg += ALIGN(uniforms, 2) / 2;
+   }
+
+   /* for now, we are not doing any elimination of unused slots, nor
+    * are we packing our uniforms.
+    */
+   c->prog_data.nr_params = this->uniforms * 4;
+
+   c->prog_data.curb_read_length = reg - 1;
+   c->prog_data.uses_new_param_layout = true;
+
+   return reg;
+}
+
+void
+vec4_visitor::setup_payload(void)
+{
+   int reg = 0;
+
+   /* The payload always contains important data in g0, which contains
+    * the URB handles that are passed on to the URB write at the end
+    * of the thread.  So, we always start push constants at g1.
+    */
+   reg++;
+
+   reg = setup_uniforms(reg);
+
+   reg = setup_attributes(reg);
 
    this->first_non_payload_grf = reg;
 }
@@ -174,6 +201,18 @@ vec4_instruction::get_src(int i)
       }
       break;
 
+   case UNIFORM:
+      brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
+				    ((src[i].reg + src[i].reg_offset) % 2) * 4),
+		       0, 4, 1);
+      brw_reg = retype(brw_reg, src[i].type);
+      brw_reg.dw1.bits.swizzle = src[i].swizzle;
+      if (src[i].abs)
+	 brw_reg = brw_abs(brw_reg);
+      if (src[i].negate)
+	 brw_reg = negate(brw_reg);
+      break;
+
    case HW_REG:
       brw_reg = src[i].fixed_hw_reg;
       break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index bba1d810f19..91abd40faad 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -22,7 +22,10 @@
  */
 
 #include "brw_vec4.h"
+extern "C" {
 #include "main/macros.h"
+#include "program/prog_parameter.h"
+}
 
 namespace brw {
 
@@ -306,6 +309,130 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
    this->type = brw_type_for_base_type(type);
 }
 
+/* Our support for uniforms is piggy-backed on the struct
+ * gl_fragment_program, because that's where the values actually
+ * get stored, rather than in some global gl_shader_program uniform
+ * store.
+ */
+int
+vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
+{
+   unsigned int offset = 0;
+   float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
+
+   if (type->is_matrix()) {
+      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+							type->vector_elements,
+							1);
+
+      for (unsigned int i = 0; i < type->matrix_columns; i++) {
+	 offset += setup_uniform_values(loc + offset, column);
+      }
+
+      return offset;
+   }
+
+   switch (type->base_type) {
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_BOOL:
+      for (unsigned int i = 0; i < type->vector_elements; i++) {
+	 int slot = this->uniforms * 4 + i;
+	 switch (type->base_type) {
+	 case GLSL_TYPE_FLOAT:
+	    c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+	    break;
+	 case GLSL_TYPE_UINT:
+	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
+	    break;
+	 case GLSL_TYPE_INT:
+	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
+	    break;
+	 case GLSL_TYPE_BOOL:
+	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
+	    break;
+	 default:
+	    assert(!"not reached");
+	    c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
+	    break;
+	 }
+	 c->prog_data.param[slot] = &values[i];
+      }
+
+      for (unsigned int i = type->vector_elements; i < 4; i++) {
+	 c->prog_data.param_convert[this->uniforms * 4 + i] =
+	    PARAM_CONVERT_ZERO;
+	 c->prog_data.param[this->uniforms * 4 + i] = NULL;
+      }
+
+      this->uniform_size[this->uniforms] = type->vector_elements;
+      this->uniforms++;
+
+      return 1;
+
+   case GLSL_TYPE_STRUCT:
+      for (unsigned int i = 0; i < type->length; i++) {
+	 offset += setup_uniform_values(loc + offset,
+					type->fields.structure[i].type);
+      }
+      return offset;
+
+   case GLSL_TYPE_ARRAY:
+      for (unsigned int i = 0; i < type->length; i++) {
+	 offset += setup_uniform_values(loc + offset, type->fields.array);
+      }
+      return offset;
+
+   case GLSL_TYPE_SAMPLER:
+      /* The sampler takes up a slot, but we don't use any values from it. */
+      return 1;
+
+   default:
+      assert(!"not reached");
+      return 0;
+   }
+}
+
+/* Our support for builtin uniforms is even scarier than non-builtin.
+ * It sits on top of the PROG_STATE_VAR parameters that are
+ * automatically updated from GL context state.
+ */
+void
+vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
+{
+   const ir_state_slot *const slots = ir->state_slots;
+   assert(ir->state_slots != NULL);
+
+   for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+      /* This state reference has already been setup by ir_to_mesa,
+       * but we'll get the same index back here.  We can reference
+       * ParameterValues directly, since unlike brw_fs.cpp, we never
+       * add new state references during compile.
+       */
+      int index = _mesa_add_state_reference(this->vp->Base.Parameters,
+					    (gl_state_index *)slots[i].tokens);
+      float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
+
+      this->uniform_size[this->uniforms] = 0;
+      /* Add each of the unique swizzled channels of the element.
+       * This will end up matching the size of the glsl_type of this field.
+       */
+      int last_swiz = -1;
+      for (unsigned int j = 0; j < 4; j++) {
+	 int swiz = GET_SWZ(slots[i].swizzle, j);
+	 if (swiz == last_swiz)
+	    break;
+	 last_swiz = swiz;
+
+	 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
+	 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
+	 this->uniform_size[this->uniforms]++;
+      }
+      this->uniforms++;
+   }
+}
+
 dst_reg *
 vec4_visitor::variable_storage(ir_variable *var)
 {
@@ -496,13 +623,10 @@ vec4_visitor::visit(ir_variable *ir)
    switch (ir->mode) {
    case ir_var_in:
       reg = new(mem_ctx) dst_reg(ATTR, ir->location);
-      reg->type = brw_type_for_base_type(ir->type);
-      hash_table_insert(this->variable_ht, reg, ir);
       break;
 
    case ir_var_out:
       reg = new(mem_ctx) dst_reg(this, ir->type);
-      hash_table_insert(this->variable_ht, reg, ir);
 
       for (int i = 0; i < type_size(ir->type); i++) {
 	 output_reg[ir->location + i] = *reg;
@@ -512,14 +636,21 @@ vec4_visitor::visit(ir_variable *ir)
 
    case ir_var_temporary:
       reg = new(mem_ctx) dst_reg(this, ir->type);
-      hash_table_insert(this->variable_ht, reg, ir);
-
       break;
 
    case ir_var_uniform:
-      /* FINISHME: uniforms */
+      reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
+
+      if (!strncmp(ir->name, "gl_", 3)) {
+	 setup_builtin_uniform_values(ir);
+      } else {
+	 setup_uniform_values(ir->location, ir->type);
+      }
       break;
    }
+
+   reg->type = brw_type_for_base_type(ir->type);
+   hash_table_insert(this->variable_ht, reg, ir);
 }
 
 void
@@ -1606,6 +1737,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
    this->current_annotation = NULL;
 
    this->c = c;
+   this->vp = brw->vertex_program; /* FINISHME: change for precompile */
    this->prog_data = &c->prog_data;
 
    this->variable_ht = hash_table_ctor(0,
@@ -1615,6 +1747,12 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
    this->virtual_grf_sizes = NULL;
    this->virtual_grf_count = 0;
    this->virtual_grf_array_size = 0;
+
+   this->uniforms = 0;
+
+   this->variable_ht = hash_table_ctor(0,
+				       hash_table_pointer_hash,
+				       hash_table_pointer_compare);
 }
 
 vec4_visitor::~vec4_visitor()
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index fb4fb146f8d..ad909789d82 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -342,7 +342,7 @@ prepare_wm_pull_constants(struct brw_context *brw)
    constants = brw->wm.const_bo->virtual;
    for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
       constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i],
-				   *brw->wm.prog_data->pull_param[i]);
+				   brw->wm.prog_data->pull_param[i]);
    }
    drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
 
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index e70454416bf..affa72c7324 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -81,12 +81,21 @@ gen6_prepare_vs_push_constants(struct brw_context *brw)
 	 params_uploaded++;
       }
 
-      for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
-	 if (brw->vs.constant_map[i] != -1) {
-	    memcpy(param + brw->vs.constant_map[i] * 4,
-		   vp->program.Base.Parameters->ParameterValues[i],
-		   4 * sizeof(float));
-	    params_uploaded++;
+      if (brw->vs.prog_data->uses_new_param_layout) {
+	 for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
+	    *param = convert_param(brw->vs.prog_data->param_convert[i],
+				   brw->vs.prog_data->param[i]);
+	    param++;
+	 }
+	 params_uploaded += brw->vs.prog_data->nr_params / 4;
+      } else {
+	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+	    if (brw->vs.constant_map[i] != -1) {
+	       memcpy(param + brw->vs.constant_map[i] * 4,
+		      vp->program.Base.Parameters->ParameterValues[i],
+		      4 * sizeof(float));
+	       params_uploaded++;
+	    }
 	 }
       }
 
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 3d525248f25..07e9995f53b 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -61,7 +61,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
 
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
 	 constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
-				      *brw->wm.prog_data->param[i]);
+				      brw->wm.prog_data->param[i]);
       }
 
       if (0) {
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index a102ca772b3..1d80e96778e 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -58,7 +58,7 @@ gen7_prepare_wm_constants(struct brw_context *brw)
 
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
 	 constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
-				      *brw->wm.prog_data->param[i]);
+				      brw->wm.prog_data->param[i]);
       }
 
       if (0) {

From 83d5850518388202c5589d3181b84fb54c213fb1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 16:18:00 -0700
Subject: [PATCH 358/600] i965/vs: Fix constant vector construction.

Fixes some issues noticed in glsl-vs-all-01.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 91abd40faad..8ee4884098c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1391,10 +1391,10 @@ vec4_visitor::visit(ir_constant *ir)
       return;
    }
 
-   for (int i = 0; i < ir->type->vector_elements; i++) {
-      this->result = src_reg(this, ir->type);
-      dst_reg dst = dst_reg(this->result);
+   this->result = src_reg(this, ir->type);
+   dst_reg dst = dst_reg(this->result);
 
+   for (int i = 0; i < ir->type->vector_elements; i++) {
       dst.writemask = 1 << i;
 
       switch (ir->type->base_type) {

From 4a4857246c79c42d918a84d7e28e9afff3a9ef6d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 16:23:42 -0700
Subject: [PATCH 359/600] i965/vs: Port the fix for clip plane writemasks from
 brw_vs_emit.c.

---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 8ee4884098c..439969ab7ea 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1618,7 +1618,7 @@ vec4_visitor::emit_vue_header_gen6(int header_mrf)
 	    m = brw_message_reg(header_mrf + 1);
 
 	 emit(BRW_OPCODE_DP4,
-	      dst_reg(brw_writemask(m, 1 << (i & 7))),
+	      dst_reg(brw_writemask(m, 1 << (i & 3))),
 	      src_reg(c->userplane[i]));
       }
       header_mrf += 2;

From c0f334a3ed3c6645abd1812e39cd52f1dfa32fa1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 16:29:48 -0700
Subject: [PATCH 360/600] i965/vs: Don't emit an extra copy of the vertex
 position.

Fixes glsl-vs-abs-neg, glsl-vs-all-01, and probably many other tests.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 439969ab7ea..c4a3c8a8667 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1550,7 +1550,7 @@ vec4_visitor::emit_vue_header_gen4(int header_mrf)
        * dword 8-11 (m3) of the vertex header is the 4D space position
        * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
        * m6 is a pad so that the vertex element data is aligned
-       * m7 is the first vertex data we fill, which is the vertex position.
+       * m7 is the first vertex data we fill.
        */
       current_annotation = "NDC";
       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
@@ -1561,15 +1561,14 @@ vec4_visitor::emit_vue_header_gen4(int header_mrf)
       /* user clip distance. */
       header_mrf += 2;
 
-      /* Pad so that vertex element data (starts with position) is aligned. */
+      /* Pad so that vertex element data is aligned. */
       header_mrf++;
    } else {
       /* There are 8 dwords in VUE header pre-Ironlake:
        * dword 0-3 (m1) is indices, point width, clip flags.
        * dword 4-7 (m2) is ndc position (set above)
        *
-       * dword 8-11 (m3) is the first vertex data, which we always have be the
-       * vertex position.
+       * dword 8-11 (m3) is the first vertex data.
        */
       current_annotation = "NDC";
       emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
@@ -1592,8 +1591,7 @@ vec4_visitor::emit_vue_header_gen6(int header_mrf)
     * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
     * enabled.
     *
-    * m4 or 6 is the first vertex element data we fill, which is
-    * the vertex position.
+    * m4 or 6 is the first vertex element data we fill.
     */
 
    current_annotation = "indices, point width, clip flags";
@@ -1681,6 +1679,10 @@ vec4_visitor::emit_urb_writes()
       if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
 	 continue;
 
+      /* This is set up in the VUE header. */
+      if (attr == VERT_RESULT_HPOS)
+	 continue;
+
       /* This is loaded into the VUE header, and thus doesn't occupy
        * an attribute slot.
        */

From 82aa9299fbfe92d2526fa9f7ffd2a1ebc7827ee9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 16:31:30 -0700
Subject: [PATCH 361/600] i965/vs: Allow scalar values in assignments, too.

Fixes glsl-vs-all-02 and many other tests.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c4a3c8a8667..e3779ab0444 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1285,7 +1285,8 @@ vec4_visitor::visit(ir_assignment *ir)
    int first_enabled_chan = 0;
    int src_chan = 0;
 
-   assert(ir->lhs->type->is_vector());
+   assert(ir->lhs->type->is_vector() ||
+	  ir->lhs->type->is_scalar());
    dst.writemask = ir->write_mask;
 
    for (int i = 0; i < 4; i++) {

From 78fac1892a3a7a90eb7baf78903d70649028d27a Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 16:35:24 -0700
Subject: [PATCH 362/600] i965/vs: Allocate storage for "auto" variables just
 like temps.

Fixes segfault in glsl-vs-cross-2.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index e3779ab0444..3e62c9ebba8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -634,6 +634,7 @@ vec4_visitor::visit(ir_variable *ir)
       }
       break;
 
+   case ir_var_auto:
    case ir_var_temporary:
       reg = new(mem_ctx) dst_reg(this, ir->type);
       break;
@@ -647,6 +648,9 @@ vec4_visitor::visit(ir_variable *ir)
 	 setup_uniform_values(ir->location, ir->type);
       }
       break;
+
+   default:
+      assert(!"not reached");
    }
 
    reg->type = brw_type_for_base_type(ir->type);

From bb468fc1ede9b0a5231ebfaa51df444502d33654 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 16:37:18 -0700
Subject: [PATCH 363/600] i965/vs: Fix ir_swizzle handling.

I decided to refactor it a bit in adapting ir_to_mesa.cpp code, and
mangled it.  Fixes glsl-vs-cross-2.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3e62c9ebba8..4f2a2011068 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1109,7 +1109,7 @@ vec4_visitor::visit(ir_swizzle *ir)
    src = this->result;
    assert(src.file != BAD_FILE);
 
-   if (i < ir->type->vector_elements) {
+   for (i = 0; i < ir->type->vector_elements; i++) {
       switch (i) {
       case 0:
 	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);

From aa753c5a14637ede804e8043762693122174bf8c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:05:42 -0700
Subject: [PATCH 364/600] i965/vs: Disable loops for now until rendering is
 generally correct.

---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 4f2a2011068..c3b55db4ac1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -662,6 +662,8 @@ vec4_visitor::visit(ir_loop *ir)
 {
    ir_dereference_variable *counter = NULL;
 
+   fail("not yet\n");
+
    /* We don't want debugging output to print the whole body of the
     * loop as the annotation.
     */

From 164ccd27787e0df4ae6f85a7178aff0720d56ac9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:12:16 -0700
Subject: [PATCH 365/600] i965/vs: Fix support for "IF" instructions by copying
 brw_fs_visitor.cpp.

Fixes glsl-vs-if-greater.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c3b55db4ac1..014f7e62284 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1451,12 +1451,18 @@ vec4_visitor::visit(ir_discard *ir)
 void
 vec4_visitor::visit(ir_if *ir)
 {
+   /* Don't point the annotation at the if statement, because then it plus
+    * the then and else blocks get printed.
+    */
    this->base_ir = ir->condition;
-   ir->condition->accept(this);
-   assert(this->result.file != BAD_FILE);
 
-   /* FINISHME: condcode */
-   emit(BRW_OPCODE_IF);
+   if (intel->gen == 6) {
+      emit_if_gen6(ir);
+   } else {
+      emit_bool_to_cond_code(ir->condition);
+      vec4_instruction *inst = emit(BRW_OPCODE_IF);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+   }
 
    visit_instructions(&ir->then_instructions);
 

From eca762d831e099b549dafa0be896eac82b3fceb9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:18:31 -0700
Subject: [PATCH 366/600] i965/vs: Fix support for zero uniforms in use.

We were looking for attributes in the wrong place, and pointlessly
doing the work on gen6 at all.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 1f2853e1118..be089369bcf 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -94,7 +94,7 @@ vec4_visitor::setup_uniforms(int reg)
    /* The pre-gen6 VS requires that some push constants get loaded no
     * matter what, or the GPU would hang.
     */
-   if (this->uniforms == 0) {
+   if (intel->gen < 6 && this->uniforms == 0) {
       this->uniform_size[this->uniforms] = 1;
 
       for (unsigned int i = 0; i < 4; i++) {
@@ -105,6 +105,7 @@ vec4_visitor::setup_uniforms(int reg)
       }
 
       this->uniforms++;
+      reg++;
    } else {
       reg += ALIGN(uniforms, 2) / 2;
    }

From e5363c7fd2ed6318e86ba4a62adc0c2377e51eef Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:29:41 -0700
Subject: [PATCH 367/600] i965/vs: Use an appropriate swizzle on src regs from
 variables.

Fixes glsl-vs-if-bool.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 014f7e62284..734e2514536 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1140,6 +1140,7 @@ vec4_visitor::visit(ir_swizzle *ir)
 void
 vec4_visitor::visit(ir_dereference_variable *ir)
 {
+   const struct glsl_type *type = ir->type;
    dst_reg *reg = variable_storage(ir->var);
 
    if (!reg) {
@@ -1149,6 +1150,9 @@ vec4_visitor::visit(ir_dereference_variable *ir)
    }
 
    this->result = src_reg(*reg);
+
+   if (type->is_scalar() || type->is_vector() || type->is_matrix())
+      this->result.swizzle = swizzle_for_size(type->vector_elements);
 }
 
 void

From 814a9bef30beda427e8fbf6f3b8abb6a45f0e2e4 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:31:53 -0700
Subject: [PATCH 368/600] i965/vs: Drop the assertion about dst.reg_offset ==
 0.

Adding the offset is the right thing to do here, and fixes
glsl-vs-mat-add-1.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index be089369bcf..a41c58c7d52 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -146,7 +146,6 @@ vec4_instruction::get_dst(void)
 
    switch (dst.file) {
    case GRF:
-      assert(dst.reg_offset == 0);
       brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
       brw_reg = retype(brw_reg, dst.type);
       brw_reg.dw1.bits.writemask = dst.writemask;

From 8e947c2546c25c0dfa93b538e54113af1bf582df Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:38:44 -0700
Subject: [PATCH 369/600] i965/vs: Fix the types of array/struct dereferences.

Fixes glsl-vs-arrays-3.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 734e2514536..b6f3cbc265f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1198,6 +1198,7 @@ vec4_visitor::visit(ir_dereference_array *ir)
       src.swizzle = swizzle_for_size(ir->type->vector_elements);
    else
       src.swizzle = BRW_SWIZZLE_NOOP;
+   src.type = brw_type_for_base_type(ir->type);
 
    this->result = src;
 }
@@ -1222,6 +1223,7 @@ vec4_visitor::visit(ir_dereference_record *ir)
       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
    else
       this->result.swizzle = BRW_SWIZZLE_NOOP;
+   this->result.type = brw_type_for_base_type(ir->type);
 
    this->result.reg_offset += offset;
 }

From c3752b399ab376aa53392afb8f2d4b526054f0a8 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 19:40:46 -0700
Subject: [PATCH 370/600] i965/vs: Add support for dot product opcodes.

Fixes glsl-vs-dot-vec2.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index a41c58c7d52..71caf907b38 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -447,6 +447,18 @@ vec4_visitor::generate_code()
 	 brw_SEL(p, dst, src[0], src[1]);
 	 break;
 
+      case BRW_OPCODE_DP4:
+	 brw_DP4(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_DP3:
+	 brw_DP3(p, dst, src[0], src[1]);
+	 break;
+
+      case BRW_OPCODE_DP2:
+	 brw_DP2(p, dst, src[0], src[1]);
+	 break;
+
       case BRW_OPCODE_IF:
 	 if (inst->src[0].file != BAD_FILE) {
 	    /* The instruction has an embedded compare (only allowed on gen6) */

From 2b7632aeaa5f8b4ab3da7d33a3c71c71023a072a Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 20:03:31 -0700
Subject: [PATCH 371/600] i965/vs: Add support for if(any_nequal()) and
 if(all_equal()) on gen6.

Fixes vs-temp-array-mat2-col-rd.shader_test.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp   |  3 ++-
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 23 ++++++++++++++++---
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 71caf907b38..bc3110b0458 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -465,7 +465,8 @@ vec4_visitor::generate_code()
 	    assert(intel->gen == 6);
 	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
 	 } else {
-	    brw_IF(p, BRW_EXECUTE_8);
+	    struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
+	    brw_inst->header.predicate_control = inst->predicate;
 	 }
 	 if_depth_in_loop[loop_stack_depth]++;
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index b6f3cbc265f..4237373c13d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -543,7 +543,9 @@ vec4_visitor::emit_if_gen6(ir_if *ir)
 
       assert(expr->get_num_operands() <= 2);
       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
-	 assert(expr->operands[i]->type->is_scalar());
+	 assert(expr->operands[i]->type->is_scalar() ||
+		expr->operation == ir_binop_any_nequal ||
+		expr->operation == ir_binop_all_equal);
 
 	 expr->operands[i]->accept(this);
 	 op[i] = this->result;
@@ -589,13 +591,28 @@ vec4_visitor::emit_if_gen6(ir_if *ir)
       case ir_binop_less:
       case ir_binop_lequal:
       case ir_binop_equal:
-      case ir_binop_all_equal:
       case ir_binop_nequal:
-      case ir_binop_any_nequal:
 	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
 	 inst->conditional_mod =
 	    brw_conditional_for_comparison(expr->operation);
 	 return;
+
+      case ir_binop_all_equal:
+	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+	 inst = emit(BRW_OPCODE_IF);
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+	 return;
+
+      case ir_binop_any_nequal:
+	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+	 inst = emit(BRW_OPCODE_IF);
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+	 return;
+
       default:
 	 assert(!"not reached");
 	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));

From cda28bca0d789c328d19bf90afd35a5ff74cfb77 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 20:16:21 -0700
Subject: [PATCH 372/600] i965/vs: Apply the gen6 math workaround for math1
 instructions.

Fixes glsl-vs-masked-cos.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 4237373c13d..7e0535b5c02 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -122,14 +122,12 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
 {
    /* The gen6 math instruction ignores the source modifiers --
     * swizzle, abs, negate, and at least some parts of the register
-    * region description.  Move the source to the corresponding slots
-    * of the destination generally work.
+    * region description.
     */
-   src_reg expanded = src_reg(this, glsl_type::float_type);
-   emit(BRW_OPCODE_MOV, dst, src);
-   src = expanded;
+   src_reg temp_src = src_reg(this, glsl_type::vec4_type);
+   emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
 
-   emit(opcode, dst, src);
+   emit(opcode, dst, temp_src);
 }
 
 void

From 930afd1774bdcd013bccbd7b5717ae0bb8e3dea3 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 20:26:48 -0700
Subject: [PATCH 373/600] i965/vs: Don't forget to set up assignment condition
 code for arrays/structs.

Fixes vs-uniform-array-mat2-index-col-rd.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 7e0535b5c02..27620c47085 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1276,6 +1276,10 @@ vec4_visitor::emit_block_move(ir_assignment *ir)
 
    dst_reg dst = get_assignment_lhs(ir->lhs, this);
 
+   if (ir->condition) {
+      emit_bool_to_cond_code(ir->condition);
+   }
+
    /* FINISHME: This should really set to the correct maximal writemask for each
     * FINISHME: component written (in the loops below).
     */

From 979072613139870f12e329e4b483c7f688b40560 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 20:46:03 -0700
Subject: [PATCH 374/600] i965/vs: Handle assignment of
 structures/arrays/matrices better.

This gets the right types on the instructions, as well as emitting
minimal swizzles/writemasks.
---
 src/mesa/drivers/dri/i965/brw_vec4.h          |  3 +-
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 75 +++++++++++++------
 2 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 01058243f04..1619c2e1ef6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -390,7 +390,8 @@ public:
    void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
    void emit_if_gen6(ir_if *ir);
 
-   void emit_block_move(ir_assignment *ir);
+   void emit_block_move(dst_reg *dst, src_reg *src,
+			const struct glsl_type *type, bool predicated);
 
    /**
     * Emit the correct dot-product instruction for the type of arguments
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 27620c47085..4f7763d61bb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1269,38 +1269,69 @@ get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
 }
 
 void
-vec4_visitor::emit_block_move(ir_assignment *ir)
+vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
+			      const struct glsl_type *type, bool predicated)
 {
-   ir->rhs->accept(this);
-   src_reg src = this->result;
-
-   dst_reg dst = get_assignment_lhs(ir->lhs, this);
-
-   if (ir->condition) {
-      emit_bool_to_cond_code(ir->condition);
+   if (type->base_type == GLSL_TYPE_STRUCT) {
+      for (unsigned int i = 0; i < type->length; i++) {
+	 emit_block_move(dst, src, type->fields.structure[i].type, predicated);
+      }
+      return;
    }
 
-   /* FINISHME: This should really set to the correct maximal writemask for each
-    * FINISHME: component written (in the loops below).
-    */
-   dst.writemask = WRITEMASK_XYZW;
-
-   for (int i = 0; i < type_size(ir->lhs->type); i++) {
-      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
-      if (ir->condition)
-	 inst->predicate = BRW_PREDICATE_NORMAL;
-
-      dst.reg_offset++;
-      src.reg_offset++;
+   if (type->is_array()) {
+      for (unsigned int i = 0; i < type->length; i++) {
+	 emit_block_move(dst, src, type->fields.array, predicated);
+      }
+      return;
    }
+
+   if (type->is_matrix()) {
+      const struct glsl_type *vec_type;
+
+      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+					 type->vector_elements, 1);
+
+      for (int i = 0; i < type->matrix_columns; i++) {
+	 emit_block_move(dst, src, vec_type, predicated);
+      }
+      return;
+   }
+
+   assert(type->is_scalar() || type->is_vector());
+
+   dst->type = brw_type_for_base_type(type);
+   src->type = dst->type;
+
+   dst->writemask = (1 << type->vector_elements) - 1;
+
+   /* Do we need to worry about swizzling a swizzle? */
+   assert(src->swizzle = BRW_SWIZZLE_NOOP);
+   src->swizzle = swizzle_for_size(type->vector_elements);
+
+   vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
+   if (predicated)
+      inst->predicate = BRW_PREDICATE_NORMAL;
+
+   dst->reg_offset++;
+   src->reg_offset++;
 }
 
 void
 vec4_visitor::visit(ir_assignment *ir)
 {
+   dst_reg dst = get_assignment_lhs(ir->lhs, this);
+
    if (!ir->lhs->type->is_scalar() &&
        !ir->lhs->type->is_vector()) {
-      emit_block_move(ir);
+      ir->rhs->accept(this);
+      src_reg src = this->result;
+
+      if (ir->condition) {
+	 emit_bool_to_cond_code(ir->condition);
+      }
+
+      emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
       return;
    }
 
@@ -1310,8 +1341,6 @@ vec4_visitor::visit(ir_assignment *ir)
    ir->rhs->accept(this);
    src_reg src = this->result;
 
-   dst_reg dst = get_assignment_lhs(ir->lhs, this);
-
    int swizzles[4];
    int first_enabled_chan = 0;
    int src_chan = 0;

From aba9801996f2f524a765df378c234a7645b3a5d1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 20:54:25 -0700
Subject: [PATCH 375/600] i965/vs: Fix types of varying outputs.

For structs/arrays/matrices, they were ending up as uint because we
forgot to set them.  All varyings in GLSL 1.20 are of base type float,
so just force the matter here (which gets inherited at
emit_urb_writes() time).

Fixes vs-varying-array-mat2-col-rd.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 4f7763d61bb..5e2b3e5a5fe 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -646,6 +646,7 @@ vec4_visitor::visit(ir_variable *ir)
       for (int i = 0; i < type_size(ir->type); i++) {
 	 output_reg[ir->location + i] = *reg;
 	 output_reg[ir->location + i].reg_offset = i;
+	 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
       }
       break;
 

From 31ef2e3ec2f5837eea0899b4bda5ea15e335a6a2 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 21:22:36 -0700
Subject: [PATCH 376/600] i965/vs: Avoid generating extra moves when setting up
 large ir_constants.

We were also screwing up the types in the process, and just not
emitting moves was easier.
---
 src/mesa/drivers/dri/i965/brw_vec4.h          |  2 +
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 77 +++++++------------
 2 files changed, 28 insertions(+), 51 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 1619c2e1ef6..3e457fc61aa 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -393,6 +393,8 @@ public:
    void emit_block_move(dst_reg *dst, src_reg *src,
 			const struct glsl_type *type, bool predicated);
 
+   void emit_constant_values(dst_reg *dst, ir_constant *value);
+
    /**
     * Emit the correct dot-product instruction for the type of arguments
     */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 5e2b3e5a5fe..3562779413f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1387,96 +1387,71 @@ vec4_visitor::visit(ir_assignment *ir)
    }
 }
 
-
 void
-vec4_visitor::visit(ir_constant *ir)
+vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
 {
    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
-      src_reg temp_base = src_reg(this, ir->type);
-      dst_reg temp = dst_reg(temp_base);
+      foreach_list(node, &ir->components) {
+	 ir_constant *field_value = (ir_constant *)node;
 
-      foreach_iter(exec_list_iterator, iter, ir->components) {
-	 ir_constant *field_value = (ir_constant *)iter.get();
-	 int size = type_size(field_value->type);
-
-	 assert(size > 0);
-
-	 field_value->accept(this);
-	 src_reg src = this->result;
-
-	 for (int i = 0; i < (unsigned int)size; i++) {
-	    emit(BRW_OPCODE_MOV, temp, src);
-
-	    src.reg_offset++;
-	    temp.reg_offset++;
-	 }
+	 emit_constant_values(dst, field_value);
       }
-      this->result = temp_base;
       return;
    }
 
    if (ir->type->is_array()) {
-      src_reg temp_base = src_reg(this, ir->type);
-      dst_reg temp = dst_reg(temp_base);
-      int size = type_size(ir->type->fields.array);
-
-      assert(size > 0);
-
       for (unsigned int i = 0; i < ir->type->length; i++) {
-	 ir->array_elements[i]->accept(this);
-	 src_reg src = this->result;
-	 for (int j = 0; j < size; j++) {
-	    emit(BRW_OPCODE_MOV, temp, src);
-
-	    src.reg_offset++;
-	    temp.reg_offset++;
-	 }
+	 emit_constant_values(dst, ir->array_elements[i]);
       }
-      this->result = temp_base;
       return;
    }
 
    if (ir->type->is_matrix()) {
-      this->result = src_reg(this, ir->type);
-      dst_reg dst = dst_reg(this->result);
-
-      assert(ir->type->base_type == GLSL_TYPE_FLOAT);
-
       for (int i = 0; i < ir->type->matrix_columns; i++) {
 	 for (int j = 0; j < ir->type->vector_elements; j++) {
-	    dst.writemask = 1 << j;
-	    emit(BRW_OPCODE_MOV, dst,
+	    dst->writemask = 1 << j;
+	    dst->type = BRW_REGISTER_TYPE_F;
+
+	    emit(BRW_OPCODE_MOV, *dst,
 		 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
 	 }
-	 dst.reg_offset++;
+	 dst->reg_offset++;
       }
       return;
    }
 
-   this->result = src_reg(this, ir->type);
-   dst_reg dst = dst_reg(this->result);
-
    for (int i = 0; i < ir->type->vector_elements; i++) {
-      dst.writemask = 1 << i;
+      dst->writemask = 1 << i;
+      dst->type = brw_type_for_base_type(ir->type);
 
       switch (ir->type->base_type) {
       case GLSL_TYPE_FLOAT:
-	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i]));
+	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
 	 break;
       case GLSL_TYPE_INT:
-	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i]));
+	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
 	 break;
       case GLSL_TYPE_UINT:
-	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i]));
+	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
 	 break;
       case GLSL_TYPE_BOOL:
-	 emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i]));
+	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
 	 break;
       default:
 	 assert(!"Non-float/uint/int/bool constant");
 	 break;
       }
    }
+   dst->reg_offset++;
+}
+
+void
+vec4_visitor::visit(ir_constant *ir)
+{
+   dst_reg dst = dst_reg(this, ir->type);
+   this->result = src_reg(dst);
+
+   emit_constant_values(&dst, ir);
 }
 
 void

From 160a5a3ff0fc826a2978c6bea6de21b445f612e9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Aug 2011 21:53:00 -0700
Subject: [PATCH 377/600] i965/vs: Add support for VUEs larger than a single
 URB write.

Fixes glsl-max-varyings.
---
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 42 +++++++++++++++----
 1 file changed, 34 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3562779413f..f90025c8e7e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1703,6 +1703,7 @@ vec4_visitor::emit_urb_writes()
    int base_mrf = 1;
    int mrf = base_mrf;
    int urb_entry_size;
+   uint64_t outputs_remaining = c->prog_data.outputs_written;
 
    /* FINISHME: edgeflag */
 
@@ -1717,11 +1718,14 @@ vec4_visitor::emit_urb_writes()
       mrf = emit_vue_header_gen4(mrf);
    }
 
+   /* Set up the VUE data for the first URB write */
    int attr;
    for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
       if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
 	 continue;
 
+      outputs_remaining &= ~BITFIELD64_BIT(attr);
+
       /* This is set up in the VUE header. */
       if (attr == VERT_RESULT_HPOS)
 	 continue;
@@ -1734,27 +1738,49 @@ vec4_visitor::emit_urb_writes()
 
       emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
 
-      /* If this is MRF 15, we can't fit anything more into this URB
+      /* If this was MRF 15, we can't fit anything more into this URB
        * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
        * even-numbered amount of URB write data, which will meet
        * gen6's requirements for length alignment.
        */
-      if (mrf == 15)
+      if (mrf == 16) {
+	 attr++;
 	 break;
+      }
    }
 
    vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
    inst->base_mrf = base_mrf;
    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
-   inst->eot = true;
+   inst->eot = !outputs_remaining;
 
    urb_entry_size = mrf - base_mrf;
 
-   for (; attr < VERT_RESULT_MAX; attr++) {
-      if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
-	 continue;
-      fail("Second URB write not supported.\n");
-      break;
+   /* Optional second URB write */
+   if (outputs_remaining) {
+      mrf = base_mrf + 1;
+
+      for (; attr < VERT_RESULT_MAX; attr++) {
+	 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+	    continue;
+
+	 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
+
+	 assert(mrf != 16);
+      }
+
+      inst = emit(VS_OPCODE_URB_WRITE);
+      inst->base_mrf = base_mrf;
+      inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+      inst->eot = true;
+      /* URB destination offset.  In the previous write, we got MRFs 2-
+       * 15 MRFs minus the one header MRF, so 14 regs.  URB offset is in
+       * URB row increments, and each of our MRFs is half of one of
+       * those, since we're doing interleaved writes.
+       */
+      inst->offset = 14 / 2;
+
+      urb_entry_size += mrf - base_mrf;
    }
 
    if (intel->gen == 6)

From e355b179b2bd42a585464f17759764083fa3ef26 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 10:43:49 -0700
Subject: [PATCH 378/600] i965: Remove dead brw->wm.max_threads field.

---
 src/mesa/drivers/dri/i965/brw_context.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 4a1abd6252e..38b13098bc0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -735,7 +735,6 @@ struct brw_context
       GLuint render_surf;
       GLuint nr_surfaces;      
 
-      GLuint max_threads;
       drm_intel_bo *scratch_bo;
 
       GLuint sampler_count;

From 2b224d66a01f3ce867fb05558b25749705bbfe7a Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 10:44:15 -0700
Subject: [PATCH 379/600] i965: Set up allocation of a VS scratch space if
 required.

---
 src/mesa/drivers/dri/i965/brw_context.h |  6 ++++++
 src/mesa/drivers/dri/i965/brw_program.c | 28 +++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vs.c      |  9 ++++++++
 src/mesa/drivers/dri/i965/brw_vs.h      |  1 +
 src/mesa/drivers/dri/i965/brw_wm.c      | 25 +++-------------------
 5 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 38b13098bc0..add8c568795 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -312,6 +312,7 @@ struct brw_vs_prog_data {
    GLuint total_grf;
    GLbitfield64 outputs_written;
    GLuint nr_params;       /**< number of float params/constants */
+   GLuint total_scratch;
 
    GLuint inputs_read;
 
@@ -671,6 +672,7 @@ struct brw_context
       struct brw_vs_prog_data *prog_data;
       int8_t *constant_map; /* variable array following prog_data */
 
+      drm_intel_bo *scratch_bo;
       drm_intel_bo *const_bo;
       /** Offset in the program cache to the VS program */
       uint32_t prog_offset;
@@ -858,6 +860,10 @@ void brw_validate_textures( struct brw_context *brw );
  */
 void brwInitFragProgFuncs( struct dd_function_table *functions );
 
+int brw_get_scratch_size(int size);
+void brw_get_scratch_bo(struct intel_context *intel,
+			drm_intel_bo **scratch_bo, int size);
+
 
 /* brw_urb.c
  */
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 6674f1640c8..09b5be4c96e 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -226,6 +226,34 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx,
    return GL_TRUE;
 }
 
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+int
+brw_get_scratch_size(int size)
+{
+   int i;
+
+   for (i = 1024; i < size; i *= 2)
+      ;
+
+   return i;
+}
+
+void
+brw_get_scratch_bo(struct intel_context *intel,
+		   drm_intel_bo **scratch_bo, int size)
+{
+   drm_intel_bo *old_bo = *scratch_bo;
+
+   if (old_bo && old_bo->size < size) {
+      drm_intel_bo_unreference(old_bo);
+      old_bo = NULL;
+   }
+
+   if (!old_bo) {
+      *scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "scratch bo", size, 4096);
+   }
+}
+
 void brwInitFragProgFuncs( struct dd_function_table *functions )
 {
    assert(functions->ProgramStringNotify == _tnl_program_string); 
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index bd0677db151..d389f602fba 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -45,6 +45,7 @@ static void do_vs_prog( struct brw_context *brw,
 			struct brw_vs_prog_key *key )
 {
    struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
    GLuint program_size;
    const GLuint *program;
    struct brw_vs_compile c;
@@ -97,6 +98,14 @@ static void do_vs_prog( struct brw_context *brw,
       brw_old_vs_emit(&c);
    }
 
+   /* Scratch space is used for register spilling */
+   if (c.last_scratch) {
+      c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);
+
+      brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
+			 c.prog_data.total_scratch * brw->vs_max_threads);
+   }
+
    /* get the program
     */
    program = brw_get_program(&c.func, &program_size);
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 9f9fed33970..83a37f5b800 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -66,6 +66,7 @@ struct brw_vs_compile {
    GLuint first_output;
    GLuint nr_outputs;
    GLuint first_overflow_output; /**< VERT_ATTRIB_x */
+   GLuint last_scratch;
 
    GLuint first_tmp;
    GLuint last_tmp;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index d13ac6124c8..a4524fc7889 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -244,29 +244,10 @@ bool do_wm_prog(struct brw_context *brw,
 
    /* Scratch space is used for register spilling */
    if (c->last_scratch) {
-      uint32_t total_scratch;
+      c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch);
 
-      /* Per-thread scratch space is power-of-two sized. */
-      for (c->prog_data.total_scratch = 1024;
-	   c->prog_data.total_scratch <= c->last_scratch;
-	   c->prog_data.total_scratch *= 2) {
-	 /* empty */
-      }
-      total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
-
-      if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
-	 drm_intel_bo_unreference(brw->wm.scratch_bo);
-	 brw->wm.scratch_bo = NULL;
-      }
-      if (brw->wm.scratch_bo == NULL) {
-	 brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
-						 "wm scratch",
-						 total_scratch,
-						 4096);
-      }
-   }
-   else {
-      c->prog_data.total_scratch = 0;
+      brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
+			 c->prog_data.total_scratch * brw->wm_max_threads);
    }
 
    if (unlikely(INTEL_DEBUG & DEBUG_WM))

From 314c2574ff6e562a6cfc5fb84980f092e495a948 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 10:47:54 -0700
Subject: [PATCH 380/600] i965: Add remaining scratch space setup emit to unit
 states.

---
 src/mesa/drivers/dri/i965/brw_vs_state.c  | 10 ++++++++++
 src/mesa/drivers/dri/i965/gen6_vs_state.c | 10 +++++++++-
 src/mesa/drivers/dri/i965/gen7_vs_state.c | 10 +++++++++-
 src/mesa/drivers/dri/i965/gen7_wm_state.c |  8 +++++++-
 4 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index fc4373ab311..29b3e47ab0c 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -77,6 +77,16 @@ brw_prepare_vs_unit(struct brw_context *brw)
    else
       vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
 
+   if (brw->vs.prog_data->total_scratch != 0) {
+      vs->thread2.scratch_space_base_pointer =
+	 brw->vs.scratch_bo->offset >> 10; /* reloc */
+      vs->thread2.per_thread_scratch_space =
+	 ffs(brw->vs.prog_data->total_scratch) - 11;
+   } else {
+      vs->thread2.scratch_space_base_pointer = 0;
+      vs->thread2.per_thread_scratch_space = 0;
+   }
+
    vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
    vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
    vs->thread3.dispatch_grf_start_reg = 1;
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index affa72c7324..b94121e8437 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -160,7 +160,15 @@ upload_vs_state(struct brw_context *brw)
    OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
 	     GEN6_VS_FLOATING_POINT_MODE_ALT |
 	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
-   OUT_BATCH(0); /* scratch space base offset */
+
+   if (brw->vs.prog_data->total_scratch) {
+      OUT_RELOC(brw->vs.scratch_bo,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		ffs(brw->vs.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
+
    OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
 	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
 	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 0fad3d2fb68..f3cd5d15bf0 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -71,7 +71,15 @@ upload_vs_state(struct brw_context *brw)
    OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
 	     GEN6_VS_FLOATING_POINT_MODE_ALT |
 	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
-   OUT_BATCH(0); /* scratch space base offset */
+
+   if (brw->vs.prog_data->total_scratch) {
+      OUT_RELOC(brw->vs.scratch_bo,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		ffs(brw->vs.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
+
    OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
 	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
 	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 1d80e96778e..55a603e887a 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -228,7 +228,13 @@ upload_ps_state(struct brw_context *brw)
    OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
    OUT_BATCH(brw->wm.prog_offset);
    OUT_BATCH(dw2);
-   OUT_BATCH(0); /* scratch space base offset */
+   if (brw->wm.prog_data->total_scratch) {
+      OUT_RELOC(brw->wm.scratch_bo,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		ffs(brw->wm.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
    OUT_BATCH(dw4);
    OUT_BATCH(dw5);
    OUT_BATCH(0); /* kernel 1 pointer */

From 1ff4f11dd94711a498cde0330101c58636ef2741 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 10:59:39 -0700
Subject: [PATCH 381/600] i965/vs: Track the variable index of array accesses.

This isn't used currently, as we lower all array accesses.
---
 src/mesa/drivers/dri/i965/brw_vec4.h           |  4 ++++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 16 ++++++++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 3e457fc61aa..bb40c71e4c9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -169,6 +169,8 @@ public:
    GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
    bool negate;
    bool abs;
+
+   src_reg *reladdr;
 };
 
 class dst_reg : public reg
@@ -219,6 +221,8 @@ public:
    explicit dst_reg(src_reg reg);
 
    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+
+   src_reg *reladdr;
 };
 
 class vec4_instruction : public exec_node {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f90025c8e7e..8bd048ff459 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -37,6 +37,7 @@ src_reg::src_reg(dst_reg reg)
    this->reg = reg.reg;
    this->reg_offset = reg.reg_offset;
    this->type = reg.type;
+   this->reladdr = reg.reladdr;
 
    int swizzles[4];
    int next_chan = 0;
@@ -66,6 +67,7 @@ dst_reg::dst_reg(src_reg reg)
    this->reg_offset = reg.reg_offset;
    this->type = reg.type;
    this->writemask = WRITEMASK_XYZW;
+   this->reladdr = reg.reladdr;
 }
 
 vec4_instruction *
@@ -1186,7 +1188,6 @@ vec4_visitor::visit(ir_dereference_array *ir)
    if (constant_index) {
       src.reg_offset += constant_index->value.i[0] * element_size;
    } else {
-#if 0 /* Variable array index */
       /* Variable index array dereference.  It eats the "vec4" of the
        * base of the array and an index that offsets the Mesa register
        * index.
@@ -1198,15 +1199,22 @@ vec4_visitor::visit(ir_dereference_array *ir)
       if (element_size == 1) {
 	 index_reg = this->result;
       } else {
-	 index_reg = src_reg(this, glsl_type::float_type);
+	 index_reg = src_reg(this, glsl_type::int_type);
 
 	 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
-	      this->result, src_reg_for_float(element_size));
+	      this->result, src_reg(element_size));
+      }
+
+      if (src.reladdr) {
+	 src_reg temp = src_reg(this, glsl_type::int_type);
+
+	 emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
+
+	 index_reg = temp;
       }
 
       src.reladdr = ralloc(mem_ctx, src_reg);
       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
-#endif
    }
 
    /* If the type is smaller than a vec4, replicate the last channel out. */

From 758c3c2b4588f235def48b2f28c0479a70f7c194 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 15:21:25 -0700
Subject: [PATCH 382/600] i965/vs: Reserve MRF 14/15 for array loads/register
 unspilling.

---
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 8bd048ff459..e01318af1ab 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1708,10 +1708,18 @@ align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
 void
 vec4_visitor::emit_urb_writes()
 {
+   /* MRF 0 is reserved for the debugger, so start with message header
+    * in MRF 1.
+    */
    int base_mrf = 1;
    int mrf = base_mrf;
    int urb_entry_size;
    uint64_t outputs_remaining = c->prog_data.outputs_written;
+   /* In the process of generating our URB write message contents, we
+    * may need to unspill a register or load from an array.  Those
+    * reads would use MRFs 14-15.
+    */
+   int max_usable_mrf = 13;
 
    /* FINISHME: edgeflag */
 
@@ -1751,7 +1759,7 @@ vec4_visitor::emit_urb_writes()
        * even-numbered amount of URB write data, which will meet
        * gen6's requirements for length alignment.
        */
-      if (mrf == 16) {
+      if (mrf > max_usable_mrf) {
 	 attr++;
 	 break;
       }
@@ -1772,21 +1780,21 @@ vec4_visitor::emit_urb_writes()
 	 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
 	    continue;
 
-	 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
+	 assert(mrf < max_usable_mrf);
 
-	 assert(mrf != 16);
+	 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
       }
 
       inst = emit(VS_OPCODE_URB_WRITE);
       inst->base_mrf = base_mrf;
       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
       inst->eot = true;
-      /* URB destination offset.  In the previous write, we got MRFs 2-
-       * 15 MRFs minus the one header MRF, so 14 regs.  URB offset is in
+      /* URB destination offset.  In the previous write, we got MRFs
+       * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
        * URB row increments, and each of our MRFs is half of one of
        * those, since we're doing interleaved writes.
        */
-      inst->offset = 14 / 2;
+      inst->offset = (max_usable_mrf - base_mrf) / 2;
 
       urb_entry_size += mrf - base_mrf;
    }

From d0e4d71070cd7fa197ed98612782484ec1f27123 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 12:15:26 -0700
Subject: [PATCH 383/600] i965/vs: Move virtual GRFs with array accesses to
 them to scratch space.

---
 src/mesa/drivers/dri/i965/brw_defines.h       |   2 +
 src/mesa/drivers/dri/i965/brw_vec4.h          |  12 ++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp   |  10 +-
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 163 ++++++++++++++++++
 4 files changed, 186 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index e3823c65d1a..b740d87c933 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -640,6 +640,8 @@ enum opcode {
    FS_OPCODE_PULL_CONSTANT_LOAD,
 
    VS_OPCODE_URB_WRITE,
+   VS_OPCODE_SCRATCH_READ,
+   VS_OPCODE_SCRATCH_WRITE,
 };
 
 #define BRW_PREDICATE_NONE             0
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index bb40c71e4c9..2f171b72049 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -376,6 +376,7 @@ public:
    void setup_payload();
    void reg_allocate_trivial();
    void reg_allocate();
+   void move_grf_array_access_to_scratch();
 
    vec4_instruction *emit(enum opcode opcode);
 
@@ -424,6 +425,17 @@ public:
    int emit_vue_header_gen4(int header_mrf);
    void emit_urb_writes(void);
 
+   src_reg get_scratch_offset(vec4_instruction *inst,
+			      src_reg *reladdr, int reg_offset);
+   void emit_scratch_read(vec4_instruction *inst,
+			  dst_reg dst,
+			  src_reg orig_src,
+			  int base_offset);
+   void emit_scratch_write(vec4_instruction *inst,
+			   src_reg temp,
+			   dst_reg orig_dst,
+			   int base_offset);
+
    GLboolean try_emit_sat(ir_expression *ir);
 
    bool process_move_condition(ir_rvalue *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index bc3110b0458..57eb467567e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -321,7 +321,7 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
 bool
 vec4_visitor::run()
 {
-   /* Generate FS IR for main().  (the visitor only descends into
+   /* Generate VS IR for main().  (the visitor only descends into
     * functions called "main").
     */
    foreach_iter(exec_list_iterator, iter, *shader->ir) {
@@ -332,6 +332,14 @@ vec4_visitor::run()
 
    emit_urb_writes();
 
+   /* Before any optimization, push array accesses out to scratch
+    * space where we need them to be.  This pass may allocate new
+    * virtual GRFs, so we want to do it early.  It also makes sure
+    * that we have reladdr computations available for CSE, since we'll
+    * often do repeated subexpressions for those.
+    */
+   move_grf_array_access_to_scratch();
+
    if (failed)
       return false;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index e01318af1ab..049af6c3992 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1805,6 +1805,169 @@ vec4_visitor::emit_urb_writes()
       c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
 }
 
+src_reg
+vec4_visitor::get_scratch_offset(vec4_instruction *inst,
+				 src_reg *reladdr, int reg_offset)
+{
+   /* Because we store the values to scratch interleaved like our
+    * vertex data, we need to scale the vec4 index by 2.
+    */
+   int message_header_scale = 2;
+
+   /* Pre-gen6, the message header uses byte offsets instead of vec4
+    * (16-byte) offset units.
+    */
+   if (intel->gen < 6)
+      message_header_scale *= 16;
+
+   if (reladdr) {
+      src_reg index = src_reg(this, glsl_type::int_type);
+
+      vec4_instruction *add = emit(BRW_OPCODE_ADD,
+				   dst_reg(index),
+				   *reladdr,
+				   src_reg(reg_offset));
+      /* Move our new instruction from the tail to its correct place. */
+      add->remove();
+      inst->insert_before(add);
+
+      vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
+				   index, src_reg(message_header_scale));
+      mul->remove();
+      inst->insert_before(mul);
+
+      return index;
+   } else {
+      return src_reg(reg_offset * message_header_scale);
+   }
+}
+
+/**
+ * Emits an instruction before @inst to load the value named by @orig_src
+ * from scratch space at @base_offset to @temp.
+ */
+void
+vec4_visitor::emit_scratch_read(vec4_instruction *inst,
+				dst_reg temp, src_reg orig_src,
+				int base_offset)
+{
+   int reg_offset = base_offset + orig_src.reg_offset;
+   src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
+
+   vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
+					      temp, index);
+
+   scratch_read_inst->base_mrf = 14;
+   scratch_read_inst->mlen = 1;
+   /* Move our instruction from the tail to its correct place. */
+   scratch_read_inst->remove();
+   inst->insert_before(scratch_read_inst);
+}
+
+/**
+ * Emits an instruction after @inst to store the value to be written
+ * to @orig_dst to scratch space at @base_offset, from @temp.
+ */
+void
+vec4_visitor::emit_scratch_write(vec4_instruction *inst,
+				 src_reg temp, dst_reg orig_dst,
+				 int base_offset)
+{
+   int reg_offset = base_offset + orig_dst.reg_offset;
+   src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
+
+   dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
+				       orig_dst.writemask));
+   vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
+					       dst, temp, index);
+   scratch_write_inst->base_mrf = 13;
+   scratch_write_inst->mlen = 2;
+   scratch_write_inst->predicate = inst->predicate;
+   /* Move our instruction from the tail to its correct place. */
+   scratch_write_inst->remove();
+   inst->insert_after(scratch_write_inst);
+}
+
+/**
+ * We can't generally support array access in GRF space, because a
+ * single instruction's destination can only span 2 contiguous
+ * registers.  So, we send all GRF arrays that get variable index
+ * access to scratch space.
+ */
+void
+vec4_visitor::move_grf_array_access_to_scratch()
+{
+   int scratch_loc[this->virtual_grf_count];
+
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      scratch_loc[i] = -1;
+   }
+
+   /* First, calculate the set of virtual GRFs that need to be punted
+    * to scratch due to having any array access on them, and where in
+    * scratch.
+    */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      if (inst->dst.file == GRF && inst->dst.reladdr &&
+	  scratch_loc[inst->dst.reg] == -1) {
+	 scratch_loc[inst->dst.reg] = c->last_scratch;
+	 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
+      }
+
+      for (int i = 0 ; i < 3; i++) {
+	 src_reg *src = &inst->src[i];
+
+	 if (src->file == GRF && src->reladdr &&
+	     scratch_loc[src->reg] == -1) {
+	    scratch_loc[src->reg] = c->last_scratch;
+	    c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
+	 }
+      }
+   }
+
+   /* Now, for anything that will be accessed through scratch, rewrite
+    * it to load/store.  Note that this is a _safe list walk, because
+    * we may generate a new scratch_write instruction after the one
+    * we're processing.
+    */
+   foreach_list_safe(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      /* Set up the annotation tracking for new generated instructions. */
+      base_ir = inst->ir;
+      current_annotation = inst->annotation;
+
+      if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
+	 src_reg temp = src_reg(this, glsl_type::vec4_type);
+
+	 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
+
+	 inst->dst.file = temp.file;
+	 inst->dst.reg = temp.reg;
+	 inst->dst.reg_offset = temp.reg_offset;
+	 inst->dst.reladdr = NULL;
+      }
+
+      for (int i = 0 ; i < 3; i++) {
+	 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
+	    continue;
+
+	 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+
+	 emit_scratch_read(inst, temp, inst->src[i],
+			   scratch_loc[inst->src[i].reg]);
+
+	 inst->src[i].file = temp.file;
+	 inst->src[i].reg = temp.reg;
+	 inst->src[i].reg_offset = temp.reg_offset;
+	 inst->src[i].reladdr = NULL;
+      }
+   }
+}
+
+
 vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
 			   struct gl_shader_program *prog,
 			   struct brw_shader *shader)

From 0f22f98ccd69bb5e8df3c78203bce9bc630965c1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 13:16:06 -0700
Subject: [PATCH 384/600] i965: Make some EU emit code for DP read/write
 messages non-static.

We keep building these strange interfaces for DP read/write where
there's a helper function with some partially-specific,
partially-general controls, which is used in exactly one place in code
generation.  Making these public will let us set up those instructions
in the one place they're to be generated.
---
 src/mesa/drivers/dri/i965/brw_eu.h      | 27 +++++++++++++++
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 44 ++++++++++++-------------
 2 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 38dd99b693d..af50305fc2b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -801,6 +801,12 @@ void brw_init_compile(struct brw_context *, struct brw_compile *p,
 		      void *mem_ctx);
 const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
 
+struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode);
+void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+		  struct brw_reg dest);
+void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+		  struct brw_reg reg);
+
 
 /* Helpers for regular instructions:
  */
@@ -855,6 +861,27 @@ ROUND(RNDE)
 
 /* Helpers for SEND instruction:
  */
+void brw_set_dp_read_message(struct brw_compile *p,
+			     struct brw_instruction *insn,
+			     GLuint binding_table_index,
+			     GLuint msg_control,
+			     GLuint msg_type,
+			     GLuint target_cache,
+			     GLuint msg_length,
+			     GLuint response_length);
+
+void brw_set_dp_write_message(struct brw_compile *p,
+			      struct brw_instruction *insn,
+			      GLuint binding_table_index,
+			      GLuint msg_control,
+			      GLuint msg_type,
+			      GLuint msg_length,
+			      GLboolean header_present,
+			      GLuint pixel_scoreboard_clear,
+			      GLuint response_length,
+			      GLuint end_of_thread,
+			      GLuint send_commit_msg);
+
 void brw_urb_WRITE(struct brw_compile *p,
 		   struct brw_reg dest,
 		   GLuint msg_reg_nr,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index e7370f36064..b08906426e4 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -89,9 +89,9 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
 }
 
 
-static void brw_set_dest(struct brw_compile *p,
-			 struct brw_instruction *insn,
-			 struct brw_reg dest)
+void
+brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+	     struct brw_reg dest)
 {
    if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
        dest.file != BRW_MESSAGE_REGISTER_FILE)
@@ -221,9 +221,9 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg)
    /* 10. Check destination issues. */
 }
 
-static void brw_set_src0(struct brw_compile *p,
-			 struct brw_instruction *insn,
-			 struct brw_reg reg)
+void
+brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+	     struct brw_reg reg)
 {
    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
@@ -504,17 +504,18 @@ static void brw_set_urb_message( struct brw_compile *p,
     }
 }
 
-static void brw_set_dp_write_message( struct brw_compile *p,
-				      struct brw_instruction *insn,
-				      GLuint binding_table_index,
-				      GLuint msg_control,
-				      GLuint msg_type,
-				      GLuint msg_length,
-				      GLboolean header_present,
-				      GLuint pixel_scoreboard_clear,
-				      GLuint response_length,
-				      GLuint end_of_thread,
-				      GLuint send_commit_msg)
+void
+brw_set_dp_write_message(struct brw_compile *p,
+			 struct brw_instruction *insn,
+			 GLuint binding_table_index,
+			 GLuint msg_control,
+			 GLuint msg_type,
+			 GLuint msg_length,
+			 GLboolean header_present,
+			 GLuint pixel_scoreboard_clear,
+			 GLuint response_length,
+			 GLuint end_of_thread,
+			 GLuint send_commit_msg)
 {
    struct brw_context *brw = p->brw;
    struct intel_context *intel = &brw->intel;
@@ -570,7 +571,7 @@ static void brw_set_dp_write_message( struct brw_compile *p,
    }
 }
 
-static void
+void
 brw_set_dp_read_message(struct brw_compile *p,
 			struct brw_instruction *insn,
 			GLuint binding_table_index,
@@ -709,9 +710,9 @@ static void brw_set_sampler_message(struct brw_compile *p,
 }
 
 
-
-static struct brw_instruction *next_insn( struct brw_compile *p, 
-					  GLuint opcode )
+#define next_insn brw_next_insn
+struct brw_instruction *
+brw_next_insn(struct brw_compile *p, GLuint opcode)
 {
    struct brw_instruction *insn;
 
@@ -732,7 +733,6 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
    return insn;
 }
 
-
 static struct brw_instruction *brw_alu1( struct brw_compile *p,
 					 GLuint opcode,
 					 struct brw_reg dest,

From 584ff407482fd3baf5ce081dbbf9653eb76c40f1 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 13:36:11 -0700
Subject: [PATCH 385/600] i965/vs: Add support for scratch read/write codegen.

---
 src/mesa/drivers/dri/i965/brw_vec4.h        |   9 ++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 144 +++++++++++++++++++-
 2 files changed, 151 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 2f171b72049..b5f442e6d21 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -451,6 +451,15 @@ public:
 			    struct brw_reg dst,
 			    struct brw_reg src);
    void generate_urb_write(vec4_instruction *inst);
+   void generate_oword_dual_block_offsets(struct brw_reg m1,
+					  struct brw_reg index);
+   void generate_scratch_write(vec4_instruction *inst,
+			       struct brw_reg dst,
+			       struct brw_reg src,
+			       struct brw_reg index);
+   void generate_scratch_read(vec4_instruction *inst,
+			      struct brw_reg dst,
+			      struct brw_reg index);
 };
 
 } /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 57eb467567e..21830f99fc2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -1,5 +1,4 @@
-/*
- * Copyright © 2011 Intel Corporation
+/* Copyright © 2011 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -278,6 +277,139 @@ vec4_visitor::generate_urb_write(vec4_instruction *inst)
 		 BRW_URB_SWIZZLE_INTERLEAVE);
 }
 
+void
+vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1,
+						struct brw_reg index)
+{
+   int second_vertex_offset;
+
+   if (intel->gen >= 6)
+      second_vertex_offset = 1;
+   else
+      second_vertex_offset = 16;
+
+   m1 = retype(m1, BRW_REGISTER_TYPE_D);
+
+   /* Set up M1 (message payload).  Only the block offsets in M1.0 and
+    * M1.4 are used, and the rest are ignored.
+    */
+   struct brw_reg m1_0 = suboffset(vec1(m1), 0);
+   struct brw_reg m1_4 = suboffset(vec1(m1), 4);
+   struct brw_reg index_0 = suboffset(vec1(index), 0);
+   struct brw_reg index_4 = suboffset(vec1(index), 4);
+
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_access_mode(p, BRW_ALIGN_1);
+
+   brw_MOV(p, m1_0, index_0);
+
+   brw_set_predicate_inverse(p, true);
+   if (index.file == BRW_IMMEDIATE_VALUE) {
+      index_4.dw1.ud++;
+      brw_MOV(p, m1_4, index_4);
+   } else {
+      brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
+   }
+
+   brw_pop_insn_state(p);
+}
+
+void
+vec4_visitor::generate_scratch_read(vec4_instruction *inst,
+				    struct brw_reg dst,
+				    struct brw_reg index)
+{
+   if (intel->gen >= 6) {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_MOV(p,
+	      retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
+	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
+      brw_pop_insn_state(p);
+   }
+
+   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+				     index);
+
+   uint32_t msg_type;
+
+   if (intel->gen >= 6)
+      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   else if (intel->gen == 5 || intel->is_g4x)
+      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   else
+      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+
+   /* Each of the 8 channel enables is considered for whether each
+    * dword is written.
+    */
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
+   brw_set_dp_read_message(p, send,
+			   255, /* binding table index: stateless access */
+			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+			   msg_type,
+			   BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+			   2, /* mlen */
+			   1 /* rlen */);
+}
+
+void
+vec4_visitor::generate_scratch_write(vec4_instruction *inst,
+				     struct brw_reg dst,
+				     struct brw_reg src,
+				     struct brw_reg index)
+{
+   /* If the instruction is predicated, we'll predicate the send, not
+    * the header setup.
+    */
+   brw_set_predicate_control(p, false);
+
+   if (intel->gen >= 6) {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_MOV(p,
+	      retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
+	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
+      brw_pop_insn_state(p);
+   }
+
+   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+				     index);
+
+   brw_MOV(p,
+	   retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
+	   retype(src, BRW_REGISTER_TYPE_D));
+
+   uint32_t msg_type;
+
+   if (intel->gen >= 6)
+      msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+   else
+      msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+
+   brw_set_predicate_control(p, inst->predicate);
+
+   /* Each of the 8 channel enables is considered for whether each
+    * dword is written.
+    */
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
+   brw_set_dp_write_message(p, send,
+			    255, /* binding table index: stateless access */
+			    BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+			    msg_type,
+			    3, /* mlen */
+			    true, /* header present */
+			    false, /* pixel scoreboard */
+			    0, /* rlen */
+			    false, /* eot */
+			    false /* commit */);
+}
+
 void
 vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
 				      struct brw_reg dst,
@@ -308,6 +440,14 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
       generate_urb_write(inst);
       break;
 
+   case VS_OPCODE_SCRATCH_READ:
+      generate_scratch_read(inst, dst, src[0]);
+      break;
+
+   case VS_OPCODE_SCRATCH_WRITE:
+      generate_scratch_write(inst, dst, src[0], src[1]);
+      break;
+
    default:
       if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
 	 fail("unsupported opcode in `%s' in VS\n",

From 54fa706d6f06955221cb6b452b5b170bfaaceef4 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 13:38:50 -0700
Subject: [PATCH 386/600] i965/vs: Enable variable array indexing in the VS.

---
 src/mesa/drivers/dri/i965/brw_shader.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 2eeeec25cac..2dc32c95610 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -111,12 +111,14 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
       brw_do_cubemap_normalize(shader->ir);
       lower_noise(shader->ir);
       lower_quadop_vector(shader->ir, false);
+
+      bool input = true;
+      bool output = stage == MESA_SHADER_FRAGMENT;
+      bool temp = stage == MESA_SHADER_FRAGMENT;
+      bool uniform = true;
+
       lower_variable_index_to_cond_assign(shader->ir,
-					  GL_TRUE, /* input */
-					  GL_TRUE, /* output */
-					  GL_TRUE, /* temp */
-					  GL_TRUE /* uniform */
-					  );
+					  input, output, temp, uniform);
 
       do {
 	 progress = false;

From e94bdbe04a4f0adb73ab92153987f0c9f48814f7 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Sun, 7 Aug 2011 17:09:12 -0700
Subject: [PATCH 387/600] i965: Add gen6 disassembly for DP render cache
 messages.

---
 src/mesa/drivers/dri/i965/brw_disasm.c | 49 ++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index af41c848308..927b0b4acc9 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -309,6 +309,35 @@ char *target_function[16] = {
     [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
 };
 
+char *target_function_gen6[16] = {
+    [BRW_MESSAGE_TARGET_NULL] = "null",
+    [BRW_MESSAGE_TARGET_MATH] = "math",
+    [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+    [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+    [GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE] = "sampler",
+    [GEN6_MESSAGE_TARGET_DP_RENDER_CACHE] = "render",
+    [GEN6_MESSAGE_TARGET_DP_CONST_CACHE] = "const",
+    [BRW_MESSAGE_TARGET_URB] = "urb",
+    [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *dp_rc_msg_type_gen6[16] = {
+    [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
+    [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
+    [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
+    [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
+    [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read",
+    [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
+    [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write",
+};
+
 char *math_function[16] = {
     [BRW_MATH_FUNCTION_INV] = "inv",
     [BRW_MATH_FUNCTION_LOG] = "log",
@@ -927,8 +956,14 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
 	newline (file);
 	pad (file, 16);
 	space = 0;
-	err |= control (file, "target function", target_function,
-			target, &space);
+
+	if (gen >= 6) {
+	   err |= control (file, "target function", target_function_gen6,
+			   target, &space);
+	} else {
+	   err |= control (file, "target function", target_function,
+			   target, &space);
+	}
 
 	switch (target) {
 	case BRW_MESSAGE_TARGET_MATH:
@@ -985,9 +1020,16 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
 			inst->bits3.dp_read.msg_type);
 	    }
 	    break;
+
 	case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
 	    if (gen >= 6) {
-		format (file, " (%d, %d, %d, %d, %d, %d)",
+		format (file, " (");
+
+		err |= control (file, "DP rc message type",
+				dp_rc_msg_type_gen6,
+				inst->bits3.gen6_dp.msg_type, &space);
+
+		format (file, ", %d, %d, %d, %d, %d, %d)",
 			inst->bits3.gen6_dp.binding_table_index,
 			inst->bits3.gen6_dp.msg_control,
 			inst->bits3.gen6_dp.msg_type,
@@ -1003,6 +1045,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
 			inst->bits3.dp_write.send_commit_msg);
 	    }
 	    break;
+
 	case BRW_MESSAGE_TARGET_URB:
 	    if (gen >= 5) {
 		format (file, " %d", inst->bits3.urb_gen5.offset);

From 7b91eefe7cbe771397684b5970f7c04313baa2f0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 8 Aug 2011 15:56:11 -0700
Subject: [PATCH 388/600] i965/vs: Slightly improve the trivial reg allocator
 to skip unused regs.

This fixes most of the regressions in the vs array test set from the
varying array indexing work, since the giant array that was originally
allocated in virtual GRF space never gets used and is only ever
read/stored from scratch space.
---
 .../dri/i965/brw_vec4_reg_allocate.cpp        | 26 +++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index e7f6b28a536..1bfd84d76e8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -41,15 +41,37 @@ vec4_visitor::reg_allocate_trivial()
 {
    int last_grf = 0;
    int hw_reg_mapping[this->virtual_grf_count];
+   bool virtual_grf_used[this->virtual_grf_count];
    int i;
    int next;
 
+   /* Calculate which virtual GRFs are actually in use after whatever
+    * optimization passes have occurred.
+    */
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      virtual_grf_used[i] = false;
+   }
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+      if (inst->dst.file == GRF)
+	 virtual_grf_used[inst->dst.reg] = true;
+
+      for (int i = 0; i < 3; i++) {
+	 if (inst->src[i].file == GRF)
+	    virtual_grf_used[inst->src[i].reg] = true;
+      }
+   }
+
    /* Note that compressed instructions require alignment to 2 registers. */
    hw_reg_mapping[0] = this->first_non_payload_grf;
    next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
    for (i = 1; i < this->virtual_grf_count; i++) {
-      hw_reg_mapping[i] = next;
-      next += this->virtual_grf_sizes[i];
+      if (virtual_grf_used[i]) {
+	 hw_reg_mapping[i] = next;
+	 next += this->virtual_grf_sizes[i];
+      }
    }
    prog_data->total_grf = next;
 

From 6408b0295f5c8be6fea891a025d79752484721b6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 10:57:09 -0700
Subject: [PATCH 389/600] i965/vs: Fix implementation of ir_unop_any.

We were inheriting whatever previous predicate existed.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 049af6c3992..fde1d67759a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -998,7 +998,9 @@ vec4_visitor::visit(ir_expression *ir)
       break;
 
    case ir_unop_any:
-      emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+      inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
       emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
 
       inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));

From 250770b74d33bb8625c780a74a89477af033d13a Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 11:00:28 -0700
Subject: [PATCH 390/600] i965/vs: Respect the gen6 limitation that math
 opcodes can't be align16.

Fixes vs-acos-vec3 and friends.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp   |  9 +++++++
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 26 +++++++++++++++++--
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 21830f99fc2..effc82a8004 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -250,6 +250,14 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
 				  struct brw_reg dst,
 				  struct brw_reg src)
 {
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+   /* Source swizzles are ignored. */
+   assert(!src.abs);
+   assert(!src.negate);
+   assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
+
+   brw_set_access_mode(p, BRW_ALIGN_1);
    brw_math(p,
 	    dst,
 	    brw_math_function(inst->opcode),
@@ -258,6 +266,7 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
 	    src,
 	    BRW_MATH_DATA_SCALAR,
 	    BRW_MATH_PRECISION_FULL);
+   brw_set_access_mode(p, BRW_ALIGN_16);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index fde1d67759a..f4756a9a1a8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -129,7 +129,18 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
    src_reg temp_src = src_reg(this, glsl_type::vec4_type);
    emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
 
-   emit(opcode, dst, temp_src);
+   if (dst.writemask != WRITEMASK_XYZW) {
+      /* The gen6 math instruction must be align1, so we can't do
+       * writemasks.
+       */
+      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
+
+      emit(opcode, temp_dst, temp_src);
+
+      emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
+   } else {
+      emit(opcode, dst, temp_src);
+   }
 }
 
 void
@@ -184,7 +195,18 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode,
    emit(BRW_OPCODE_MOV, dst, src1);
    src1 = expanded;
 
-   emit(opcode, dst, src0, src1);
+   if (dst.writemask != WRITEMASK_XYZW) {
+      /* The gen6 math instruction must be align1, so we can't do
+       * writemasks.
+       */
+      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
+
+      emit(opcode, temp_dst, src0, src1);
+
+      emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
+   } else {
+      emit(opcode, dst, src0, src1);
+   }
 }
 
 void

From abf843a797876b5e3c5c91dbec25b6553d2cc281 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 12:30:41 -0700
Subject: [PATCH 391/600] i965/vs: Add support for ir_binop_pow.

Fixes vs-pow-float-float.
---
 src/mesa/drivers/dri/i965/brw_vec4.h          | 10 +++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp   | 63 +++++++++++++++++--
 .../drivers/dri/i965/brw_vec4_visitor.cpp     |  4 +-
 3 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index b5f442e6d21..082021513d2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -444,12 +444,22 @@ public:
    void generate_vs_instruction(vec4_instruction *inst,
 				struct brw_reg dst,
 				struct brw_reg *src);
+
    void generate_math1_gen4(vec4_instruction *inst,
 			    struct brw_reg dst,
 			    struct brw_reg src);
    void generate_math1_gen6(vec4_instruction *inst,
 			    struct brw_reg dst,
 			    struct brw_reg src);
+   void generate_math2_gen4(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src0,
+			    struct brw_reg src1);
+   void generate_math2_gen6(vec4_instruction *inst,
+			    struct brw_reg dst,
+			    struct brw_reg src0,
+			    struct brw_reg src1);
+
    void generate_urb_write(vec4_instruction *inst);
    void generate_oword_dual_block_offsets(struct brw_reg m1,
 					  struct brw_reg index);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index effc82a8004..df9521cd04e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -245,6 +245,15 @@ vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
 	    BRW_MATH_PRECISION_FULL);
 }
 
+static void
+check_gen6_math_src_arg(struct brw_reg src)
+{
+   /* Source swizzles are ignored. */
+   assert(!src.abs);
+   assert(!src.negate);
+   assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
+}
+
 void
 vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
 				  struct brw_reg dst,
@@ -252,10 +261,7 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
 {
    /* Can't do writemask because math can't be align16. */
    assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
-   /* Source swizzles are ignored. */
-   assert(!src.abs);
-   assert(!src.negate);
-   assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
+   check_gen6_math_src_arg(src);
 
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_math(p,
@@ -269,6 +275,49 @@ vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
    brw_set_access_mode(p, BRW_ALIGN_16);
 }
 
+void
+vec4_visitor::generate_math2_gen6(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src0,
+				  struct brw_reg src1)
+{
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+   /* Source swizzles are ignored. */
+   check_gen6_math_src_arg(src0);
+   check_gen6_math_src_arg(src1);
+
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math2(p,
+	     dst,
+	     brw_math_function(inst->opcode),
+	     src0, src1);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
+void
+vec4_visitor::generate_math2_gen4(vec4_instruction *inst,
+				  struct brw_reg dst,
+				  struct brw_reg src0,
+				  struct brw_reg src1)
+{
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+
+   brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1);
+
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math(p,
+	    dst,
+	    brw_math_function(inst->opcode),
+	    BRW_MATH_SATURATE_NONE,
+	    inst->base_mrf,
+	    src0,
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+}
+
 void
 vec4_visitor::generate_urb_write(vec4_instruction *inst)
 {
@@ -442,7 +491,11 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
       break;
 
    case SHADER_OPCODE_POW:
-      assert(!"finishme");
+      if (intel->gen >= 6) {
+	 generate_math2_gen6(inst, dst, src[0], src[1]);
+      } else {
+	 generate_math2_gen4(inst, dst, src[0], src[1]);
+      }
       break;
 
    case VS_OPCODE_URB_WRITE:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f4756a9a1a8..f9447d7c391 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -188,11 +188,11 @@ vec4_visitor::emit_math2_gen6(enum opcode opcode,
     */
 
    expanded = src_reg(this, glsl_type::vec4_type);
-   emit(BRW_OPCODE_MOV, dst, src0);
+   emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
    src0 = expanded;
 
    expanded = src_reg(this, glsl_type::vec4_type);
-   emit(BRW_OPCODE_MOV, dst, src1);
+   emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
    src1 = expanded;
 
    if (dst.writemask != WRITEMASK_XYZW) {

From 0b359e3ea015576d0e75bf5ec19aceef337311a3 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 14:35:38 -0700
Subject: [PATCH 392/600] i965/vs: Add support for loops.

This is copied from brw_fs.cpp, instead of doing the temporary IR
generation that ir_to_mesa does.  Fixes glsl-vs-loop and friends.
---
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 53 ++++++++-----------
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f9447d7c391..e11ec40cc7b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -700,58 +700,47 @@ vec4_visitor::visit(ir_variable *ir)
 void
 vec4_visitor::visit(ir_loop *ir)
 {
-   ir_dereference_variable *counter = NULL;
-
-   fail("not yet\n");
+   dst_reg counter;
 
    /* We don't want debugging output to print the whole body of the
     * loop as the annotation.
     */
    this->base_ir = NULL;
 
-   if (ir->counter != NULL)
-      counter = new(ir) ir_dereference_variable(ir->counter);
+   if (ir->counter != NULL) {
+      this->base_ir = ir->counter;
+      ir->counter->accept(this);
+      counter = *(variable_storage(ir->counter));
 
-   if (ir->from != NULL) {
-      assert(ir->counter != NULL);
+      if (ir->from != NULL) {
+	 this->base_ir = ir->from;
+	 ir->from->accept(this);
 
-      ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
-
-      a->accept(this);
-      delete a;
+	 emit(BRW_OPCODE_MOV, counter, this->result);
+      }
    }
 
    emit(BRW_OPCODE_DO);
 
    if (ir->to) {
-      ir_expression *e =
-	 new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
-			       counter, ir->to);
-      ir_if *if_stmt =  new(ir) ir_if(e);
+      this->base_ir = ir->to;
+      ir->to->accept(this);
 
-      ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
+      vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(),
+				    src_reg(counter), this->result);
+      inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
 
-      if_stmt->then_instructions.push_tail(brk);
-
-      if_stmt->accept(this);
-
-      delete if_stmt;
-      delete e;
-      delete brk;
+      inst = emit(BRW_OPCODE_BREAK);
+      inst->predicate = BRW_PREDICATE_NORMAL;
    }
 
    visit_instructions(&ir->body_instructions);
 
+
    if (ir->increment) {
-      ir_expression *e =
-	 new(ir) ir_expression(ir_binop_add, counter->type,
-			       counter, ir->increment);
-
-      ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
-
-      a->accept(this);
-      delete a;
-      delete e;
+      this->base_ir = ir->increment;
+      ir->increment->accept(this);
+      emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result);
    }
 
    emit(BRW_OPCODE_WHILE);

From fea7d34b3545878ce00914f388e1eeebf55f7748 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 14:49:29 -0700
Subject: [PATCH 393/600] i965/vs: Fix builtin uniform setup.

I want to intelligently pack them at some point, but for now we have
the params set up in groups of 4.  Fixes glsl-vs-normalscale.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index e11ec40cc7b..93252f73285 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -443,13 +443,12 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
       int last_swiz = -1;
       for (unsigned int j = 0; j < 4; j++) {
 	 int swiz = GET_SWZ(slots[i].swizzle, j);
-	 if (swiz == last_swiz)
-	    break;
 	 last_swiz = swiz;
 
 	 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
 	 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
-	 this->uniform_size[this->uniforms]++;
+	 if (swiz <= last_swiz)
+	    this->uniform_size[this->uniforms]++;
       }
       this->uniforms++;
    }

From a55fbbc1a2b579aed1e80036367b521ef6928f66 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 15:08:47 -0700
Subject: [PATCH 394/600] i965/vs: Fix access of attribute arrays.

By leaving out the column index, we were reading an unallocated
attribute on glsl-mat-attribute.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index df9521cd04e..517a3e3c75b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -52,8 +52,9 @@ vec4_visitor::setup_attributes(int payload_reg)
 	 if (inst->src[i].file != ATTR)
 	    continue;
 
+	 int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset];
 	 inst->src[i].file = HW_REG;
-	 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0);
+	 inst->src[i].fixed_hw_reg = brw_vec8_grf(grf, 0);
 	 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
       }
    }

From aed5e353e95f47773864c6e61c506b9ddad0e2e9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 9 Aug 2011 15:19:26 -0700
Subject: [PATCH 395/600] i965/vs: Clamp vertex color outputs when required by
 ARB_color_buffer_float.

Fixes glsl-vs-vertex-color.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 93252f73285..2a1f003b5ce 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1764,7 +1764,16 @@ vec4_visitor::emit_urb_writes()
       if (attr == VERT_RESULT_PSIZ)
 	 continue;
 
-      emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
+      vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++),
+				    src_reg(output_reg[attr]));
+
+      if ((attr == VERT_RESULT_COL0 ||
+	   attr == VERT_RESULT_COL1 ||
+	   attr == VERT_RESULT_BFC0 ||
+	   attr == VERT_RESULT_BFC1) &&
+	  c->key.clamp_vertex_color) {
+	 inst->saturate = true;
+      }
 
       /* If this was MRF 15, we can't fit anything more into this URB
        * WRITE.  Note that base_mrf of 1 means that MRF 15 is an

From 072d64121e13ad6bcb9b703090de1ee4a59f7096 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 10 Aug 2011 11:38:42 -0700
Subject: [PATCH 396/600] i965/vs: Add support for GL_FIXED attributes.

Fixes arb_es2_compatibility-fixed-type
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 517a3e3c75b..350d544aba3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -42,6 +42,18 @@ vec4_visitor::setup_attributes(int payload_reg)
       if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
 	 attribute_map[i] = payload_reg + nr_attributes;
 	 nr_attributes++;
+
+	 /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED
+	  * attributes come in as floating point conversions of the
+	  * integer values.
+	  */
+	 if (c->key.gl_fixed_input_size[i] != 0) {
+	    struct brw_reg reg = brw_vec8_grf(attribute_map[i], 0);
+
+	    brw_MUL(p,
+		    brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1),
+		    reg, brw_imm_f(1.0 / 65536.0));
+	 }
       }
    }
 

From 193a9a209d5121e2c20f1d20c61587b1e3d0603d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 10 Aug 2011 14:13:23 -0700
Subject: [PATCH 397/600] i965/vs: Add support for if(any(bvec)) on gen6.

---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 2a1f003b5ce..d1888579597 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -564,10 +564,6 @@ vec4_visitor::emit_if_gen6(ir_if *ir)
 
       assert(expr->get_num_operands() <= 2);
       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
-	 assert(expr->operands[i]->type->is_scalar() ||
-		expr->operation == ir_binop_any_nequal ||
-		expr->operation == ir_binop_all_equal);
-
 	 expr->operands[i]->accept(this);
 	 op[i] = this->result;
       }
@@ -634,6 +630,14 @@ vec4_visitor::emit_if_gen6(ir_if *ir)
 	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
 	 return;
 
+      case ir_unop_any:
+	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+	 inst = emit(BRW_OPCODE_IF);
+	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+	 return;
+
       default:
 	 assert(!"not reached");
 	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));

From e8980c61b2932cd4c8791fcc5afdb54fa033c224 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 11 Aug 2011 09:17:18 -0700
Subject: [PATCH 398/600] i965/vs: Fix the trivial register allocator's failure
 path.

---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp         | 3 +++
 src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | 5 ++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 350d544aba3..27160fb40d4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -561,6 +561,9 @@ vec4_visitor::run()
    setup_payload();
    reg_allocate();
 
+   if (failed)
+      return false;
+
    brw_set_access_mode(p, BRW_ALIGN_16);
 
    generate_code();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 1bfd84d76e8..d5fd21d99a4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -39,7 +39,6 @@ assign(int *reg_hw_locations, reg *reg)
 void
 vec4_visitor::reg_allocate_trivial()
 {
-   int last_grf = 0;
    int hw_reg_mapping[this->virtual_grf_count];
    bool virtual_grf_used[this->virtual_grf_count];
    int i;
@@ -84,9 +83,9 @@ vec4_visitor::reg_allocate_trivial()
       assign(hw_reg_mapping, &inst->src[2]);
    }
 
-   if (last_grf >= BRW_MAX_GRF) {
+   if (prog_data->total_grf > BRW_MAX_GRF) {
       fail("Ran out of regs on trivial allocator (%d/%d)\n",
-	   last_grf, BRW_MAX_GRF);
+	   prog_data->total_grf, BRW_MAX_GRF);
    }
 }
 

From d376fa8e84b044ead47586d1b56a10742bcbdac7 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Aug 2011 18:40:14 -0700
Subject: [PATCH 399/600] i965: Fix assertion failure on a loop consisting of
 while (true) { break }.

On enabling the precompile step in the VS, we tripped over this
assertion failure in glsl-link-bug-30552.
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index b08906426e4..f5cc09dd49b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2311,7 +2311,7 @@ brw_find_loop_end(struct brw_compile *p, int start)
       if (insn->header.opcode == BRW_OPCODE_WHILE) {
 	 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
 				   : insn->bits3.break_cont.jip;
-	 if (ip + jip / br < start)
+	 if (ip + jip / br <= start)
 	    return ip;
       }
    }

From 7fbe7fe13359d3f349664410ec73d7bd48824ed6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 11 Aug 2011 09:52:08 -0700
Subject: [PATCH 400/600] i965/vs: Run the shader backend at link time and
 return compile failures.

Link failure is something that shouldn't happen, but we sometimes want
it during development.  The precompile also allows analysis of shader
codegen with shader-db.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp          |  2 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp      |  4 ++
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp   | 12 ++---
 .../drivers/dri/i965/brw_vec4_visitor.cpp     |  2 +-
 src/mesa/drivers/dri/i965/brw_vs.c            | 51 ++++++++++++++++---
 src/mesa/drivers/dri/i965/brw_vs.h            |  3 +-
 6 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 693ef0ce31a..b19c6e72fa6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1781,7 +1781,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
    fs_visitor v(c, prog, shader);
    if (!v.run()) {
       prog->LinkStatus = GL_FALSE;
-      prog->InfoLog = ralloc_strdup(prog, v.fail_msg);
+      ralloc_strcat(&prog->InfoLog, v.fail_msg);
 
       return false;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 2dc32c95610..3ff6bbaed47 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -24,6 +24,7 @@
 extern "C" {
 #include "main/macros.h"
 #include "brw_context.h"
+#include "brw_vs.h"
 }
 #include "brw_fs.h"
 #include "../glsl/ir_optimization.h"
@@ -67,6 +68,9 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
    if (!brw_fs_precompile(ctx, prog))
       return false;
 
+   if (!brw_vs_precompile(ctx, prog))
+      return false;
+
    return true;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 27160fb40d4..9ef6ab6de90 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -809,14 +809,8 @@ vec4_visitor::generate_code()
 extern "C" {
 
 bool
-brw_vs_emit(struct brw_vs_compile *c)
+brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c)
 {
-   struct brw_compile *p = &c->func;
-   struct brw_context *brw = p->brw;
-   struct intel_context *intel = &brw->intel;
-   struct gl_context *ctx = &intel->ctx;
-   struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram;
-
    if (!prog)
       return false;
 
@@ -833,8 +827,8 @@ brw_vs_emit(struct brw_vs_compile *c)
 
    vec4_visitor v(c, prog, shader);
    if (!v.run()) {
-      /* FINISHME: Cleanly fail, test at link time, etc. */
-      assert(!"not reached");
+      prog->LinkStatus = GL_FALSE;
+      ralloc_strcat(&prog->InfoLog, v.fail_msg);
       return false;
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index d1888579597..b1792a8ee16 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2012,7 +2012,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
    this->current_annotation = NULL;
 
    this->c = c;
-   this->vp = brw->vertex_program; /* FINISHME: change for precompile */
+   this->vp = prog->VertexProgram;
    this->prog_data = &c->prog_data;
 
    this->variable_ht = hash_table_ctor(0,
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index d389f602fba..3373e707d98 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -40,9 +40,11 @@
 
 #include "../glsl/ralloc.h"
 
-static void do_vs_prog( struct brw_context *brw, 
-			struct brw_vertex_program *vp,
-			struct brw_vs_prog_key *key )
+static bool
+do_vs_prog(struct brw_context *brw,
+	   struct gl_shader_program *prog,
+	   struct brw_vertex_program *vp,
+	   struct brw_vs_prog_key *key)
 {
    struct gl_context *ctx = &brw->intel.ctx;
    struct intel_context *intel = &brw->intel;
@@ -91,9 +93,11 @@ static void do_vs_prog( struct brw_context *brw,
    if (new_vs == -1)
       new_vs = getenv("INTEL_NEW_VS") != NULL;
 
-   if (new_vs) {
-      if (!brw_vs_emit(&c))
-	 brw_old_vs_emit(&c);
+   if (new_vs && prog) {
+      if (!brw_vs_emit(prog, &c)) {
+	 ralloc_free(mem_ctx);
+	 return false;
+      }
    } else {
       brw_old_vs_emit(&c);
    }
@@ -130,6 +134,8 @@ static void do_vs_prog( struct brw_context *brw,
 		    &c.prog_data, aux_size,
 		    &brw->vs.prog_offset, &brw->vs.prog_data);
    ralloc_free(mem_ctx);
+
+   return true;
 }
 
 
@@ -174,13 +180,15 @@ static void brw_upload_vs_prog(struct brw_context *brw)
    if (!brw_search_cache(&brw->cache, BRW_VS_PROG,
 			 &key, sizeof(key),
 			 &brw->vs.prog_offset, &brw->vs.prog_data)) {
-      do_vs_prog(brw, vp, &key);
+      bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram,
+				vp, &key);
+
+      assert(success);
    }
    brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
 			   sizeof(*brw->vs.prog_data));
 }
 
-
 /* See brw_vs.c:
  */
 const struct brw_tracked_state brw_vs_prog = {
@@ -193,3 +201,30 @@ const struct brw_tracked_state brw_vs_prog = {
    },
    .prepare = brw_upload_vs_prog
 };
+
+bool
+brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_vs_prog_key key;
+   struct gl_vertex_program *vp = prog->VertexProgram;
+   struct brw_vertex_program *bvp = brw_vertex_program(vp);
+   uint32_t old_prog_offset = brw->vs.prog_offset;
+   struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data;
+   bool success;
+
+   if (!vp)
+      return true;
+
+   memset(&key, 0, sizeof(key));
+
+   key.program_string_id = bvp->id;
+   key.clamp_vertex_color = true;
+
+   success = do_vs_prog(brw, prog, bvp, &key);
+
+   brw->vs.prog_offset = old_prog_offset;
+   brw->vs.prog_data = old_prog_data;
+
+   return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 83a37f5b800..beccb381ee2 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -93,7 +93,8 @@ struct brw_vs_compile {
    GLboolean needs_stack;
 };
 
-bool brw_vs_emit(struct brw_vs_compile *c);
+bool brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c);
 void brw_old_vs_emit(struct brw_vs_compile *c);
+bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
 
 #endif

From 7642c1de6b65b7dfd9e39904291cc9737cd54b56 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 11 Aug 2011 08:09:10 -0700
Subject: [PATCH 401/600] i965/vs: Avoid generating a MOV for most
 ir_assignment handling.

Removes an average of 11.5% of instructions in 54% of vertex shaders
in shader-db.
---
 src/mesa/drivers/dri/i965/brw_vec4.h          |  6 ++
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 67 +++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 082021513d2..620b05570a6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -388,6 +388,12 @@ public:
    vec4_instruction *emit(enum opcode opcode, dst_reg dst,
 			  src_reg src0, src_reg src1, src_reg src2);
 
+   bool try_rewrite_rhs_to_dst(ir_assignment *ir,
+			       dst_reg dst,
+			       src_reg src,
+			       vec4_instruction *pre_rhs_inst,
+			       vec4_instruction *last_rhs_inst);
+
    /** Walks an exec_list of ir_instruction and sends it through this visitor. */
    void visit_instructions(const exec_list *list);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index b1792a8ee16..ae733810757 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1342,6 +1342,63 @@ vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
    src->reg_offset++;
 }
 
+
+/* If the RHS processing resulted in an instruction generating a
+ * temporary value, and it would be easy to rewrite the instruction to
+ * generate its result right into the LHS instead, do so.  This ends
+ * up reliably removing instructions where it can be tricky to do so
+ * later without real UD chain information.
+ */
+bool
+vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
+				     dst_reg dst,
+				     src_reg src,
+				     vec4_instruction *pre_rhs_inst,
+				     vec4_instruction *last_rhs_inst)
+{
+   /* This could be supported, but it would take more smarts. */
+   if (ir->condition)
+      return false;
+
+   if (pre_rhs_inst == last_rhs_inst)
+      return false; /* No instructions generated to work with. */
+
+   /* Make sure the last instruction generated our source reg. */
+   if (src.file != GRF ||
+       src.file != last_rhs_inst->dst.file ||
+       src.reg != last_rhs_inst->dst.reg ||
+       src.reg_offset != last_rhs_inst->dst.reg_offset ||
+       src.reladdr ||
+       src.abs ||
+       src.negate ||
+       last_rhs_inst->predicate != BRW_PREDICATE_NONE)
+      return false;
+
+   /* Check that that last instruction fully initialized the channels
+    * we want to use, in the order we want to use them.  We could
+    * potentially reswizzle the operands of many instructions so that
+    * we could handle out of order channels, but don't yet.
+    */
+   for (int i = 0; i < 4; i++) {
+      if (dst.writemask & (1 << i)) {
+	 if (!(last_rhs_inst->dst.writemask & (1 << i)))
+	    return false;
+
+	 if (BRW_GET_SWZ(src.swizzle, i) != i)
+	    return false;
+      }
+   }
+
+   /* Success!  Rewrite the instruction. */
+   last_rhs_inst->dst.file = dst.file;
+   last_rhs_inst->dst.reg = dst.reg;
+   last_rhs_inst->dst.reg_offset = dst.reg_offset;
+   last_rhs_inst->dst.reladdr = dst.reladdr;
+   last_rhs_inst->dst.writemask &= dst.writemask;
+
+   return true;
+}
+
 void
 vec4_visitor::visit(ir_assignment *ir)
 {
@@ -1363,7 +1420,13 @@ vec4_visitor::visit(ir_assignment *ir)
    /* Now we're down to just a scalar/vector with writemasks. */
    int i;
 
+   vec4_instruction *pre_rhs_inst, *last_rhs_inst;
+   pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+
    ir->rhs->accept(this);
+
+   last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+
    src_reg src = this->result;
 
    int swizzles[4];
@@ -1396,6 +1459,10 @@ vec4_visitor::visit(ir_assignment *ir)
    src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
 			      swizzles[2], swizzles[3]);
 
+   if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
+      return;
+   }
+
    if (ir->condition) {
       emit_bool_to_cond_code(ir->condition);
    }

From 54e66a0a6327b55f15a7c641ec68da505ff19a35 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 11 Aug 2011 16:27:41 -0700
Subject: [PATCH 402/600] i965/vs: Fix abs/negate handling on attributes.

Fixes glsl-vs-neg-attribute and glsl-vs-abs-attribute.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 9ef6ab6de90..6b0ae42e0e0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -65,9 +65,16 @@ vec4_visitor::setup_attributes(int payload_reg)
 	    continue;
 
 	 int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset];
+
+	 struct brw_reg reg = brw_vec8_grf(grf, 0);
+	 reg.dw1.bits.swizzle = inst->src[i].swizzle;
+	 if (inst->src[i].abs)
+	    reg = brw_abs(reg);
+	 if (inst->src[i].negate)
+	    reg = negate(reg);
+
 	 inst->src[i].file = HW_REG;
-	 inst->src[i].fixed_hw_reg = brw_vec8_grf(grf, 0);
-	 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
+	 inst->src[i].fixed_hw_reg = reg;
       }
    }
 

From 905f3d03090c7b86e410959c5640054f5f6894ef Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 12 Aug 2011 05:15:50 -0700
Subject: [PATCH 403/600] i965/vs: Remove remaining use of foreach_iter.

---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    | 10 +++-------
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp |  4 ++--
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 6b0ae42e0e0..fca31b6dec9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -57,8 +57,8 @@ vec4_visitor::setup_attributes(int payload_reg)
       }
    }
 
-   foreach_iter(exec_list_iterator, iter, this->instructions) {
-      vec4_instruction *inst = (vec4_instruction *)iter.get();
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
 
       for (int i = 0; i < 3; i++) {
 	 if (inst->src[i].file != ATTR)
@@ -546,11 +546,7 @@ vec4_visitor::run()
    /* Generate VS IR for main().  (the visitor only descends into
     * functions called "main").
     */
-   foreach_iter(exec_list_iterator, iter, *shader->ir) {
-      ir_instruction *ir = (ir_instruction *)iter.get();
-      base_ir = ir;
-      ir->accept(this);
-   }
+   visit_instructions(shader->ir);
 
    emit_urb_writes();
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index ae733810757..fc75cc35172 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -234,8 +234,8 @@ vec4_visitor::emit_math(enum opcode opcode,
 void
 vec4_visitor::visit_instructions(const exec_list *list)
 {
-   foreach_iter(exec_list_iterator, iter, *list) {
-      ir_instruction *ir = (ir_instruction *)iter.get();
+   foreach_list(node, list) {
+      ir_instruction *ir = (ir_instruction *)node;
 
       base_ir = ir;
       ir->accept(this);

From d0c595ac8032aa9aed402a513870b8dc92e42903 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 12 Aug 2011 05:28:53 -0700
Subject: [PATCH 404/600] i965/gen6: Force WHILE exec size to 8.

We can't just look at the instruction that happens to appear at the
start of the loop, because it might be some other exec size and cause
us to only loop on the first N channels.  We always want 8 in our
current code (since 16 doesn't work so we don't do 16-wide fragment in
that case).

Fixes loop-03.vert, which was triggering the assertions.
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index f5cc09dd49b..27e81306e9c 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1341,8 +1341,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
       brw_set_src1(p, insn, brw_imm_ud(0));
       insn->bits3.break_cont.jip = br * (do_insn - insn);
 
-      insn->header.execution_size = do_insn->header.execution_size;
-      assert(insn->header.execution_size == BRW_EXECUTE_8);
+      insn->header.execution_size = BRW_EXECUTE_8;
    } else if (intel->gen == 6) {
       insn = next_insn(p, BRW_OPCODE_WHILE);
 
@@ -1351,8 +1350,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
 
-      insn->header.execution_size = do_insn->header.execution_size;
-      assert(insn->header.execution_size == BRW_EXECUTE_8);
+      insn->header.execution_size = BRW_EXECUTE_8;
    } else {
       if (p->single_program_flow) {
 	 insn = next_insn(p, BRW_OPCODE_ADD);

From 8a649277cb57cc13fb38f8e8daf07e8a2b96223c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 12 Aug 2011 05:32:25 -0700
Subject: [PATCH 405/600] i965/vs: Don't assertion fail on vertex texturing.

The linker will reject the program, but we need to survive until then.
Fixes abort in glsl1-2D Texture lookup with explicit lod (Vertex
shader)
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index fc75cc35172..d03fbff27fc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1554,7 +1554,12 @@ vec4_visitor::visit(ir_call *ir)
 void
 vec4_visitor::visit(ir_texture *ir)
 {
-   assert(!"not reached");
+   /* FINISHME: Implement vertex texturing.
+    *
+    * With 0 vertex samplers available, the linker will reject
+    * programs that do vertex texturing, but after our visitor has
+    * run.
+    */
 }
 
 void

From feff7c62ce446f4e3bb755a2f40dcbd0e70155e4 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Aug 2011 20:13:53 -0700
Subject: [PATCH 406/600] i965/vs: Fix condition code for scalar expression
 all_equals.

Fixes vs-op-eq-bool-bool.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index d03fbff27fc..3ae89dfbc45 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -985,7 +985,7 @@ vec4_visitor::visit(ir_expression *ir)
 	    temp.type = op[0].type;
 
 	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
-	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 inst->conditional_mod = BRW_CONDITIONAL_Z;
 	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
       }
       break;

From e9a86ae3370948acb1276e80fbbc421d7025db36 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Aug 2011 20:43:42 -0700
Subject: [PATCH 407/600] i965/vs: Fix memory leak of ralloc context for the
 visitor.

---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3ae89dfbc45..185a01e05f9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2104,6 +2104,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
 
 vec4_visitor::~vec4_visitor()
 {
+   ralloc_free(this->mem_ctx);
    hash_table_dtor(this->variable_ht);
 }
 

From 7bf70c29adf175f51d0347d0187aecc0e9bbbcb8 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Aug 2011 20:59:24 -0700
Subject: [PATCH 408/600] i965/vs: Add support for conversion of FIXED_HW_REG
 src_reg to/from dst_reg.

This was quietly occurring in some emit code I produced, and failed.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 185a01e05f9..621cb53ff84 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -38,6 +38,7 @@ src_reg::src_reg(dst_reg reg)
    this->reg_offset = reg.reg_offset;
    this->type = reg.type;
    this->reladdr = reg.reladdr;
+   this->fixed_hw_reg = reg.fixed_hw_reg;
 
    int swizzles[4];
    int next_chan = 0;
@@ -68,6 +69,7 @@ dst_reg::dst_reg(src_reg reg)
    this->type = reg.type;
    this->writemask = WRITEMASK_XYZW;
    this->reladdr = reg.reladdr;
+   this->fixed_hw_reg = reg.fixed_hw_reg;
 }
 
 vec4_instruction *

From 0ddf0f1c3451eef8a7c7f46afca623dc4f7c5af6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Aug 2011 21:02:10 -0700
Subject: [PATCH 409/600] i965/vs: Fix multiplies to actually do 32-bit
 multiplies.

Fixes vs-op-mult-int-int and friends.
---
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp    |  5 +++++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 18 +++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index fca31b6dec9..011af6f2d3e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -633,6 +633,11 @@ vec4_visitor::generate_code()
       case BRW_OPCODE_MUL:
 	 brw_MUL(p, dst, src[0], src[1]);
 	 break;
+      case BRW_OPCODE_MACH:
+	 brw_set_acc_write_control(p, 1);
+	 brw_MACH(p, dst, src[0], src[1]);
+	 brw_set_acc_write_control(p, 0);
+	 break;
 
       case BRW_OPCODE_FRC:
 	 brw_FRC(p, dst, src[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 621cb53ff84..a60fc5f6ada 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -945,7 +945,23 @@ vec4_visitor::visit(ir_expression *ir)
       break;
 
    case ir_binop_mul:
-      emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
+      if (ir->type->is_integer()) {
+	 /* For integer multiplication, the MUL uses the low 16 bits
+	  * of one of the operands (src0 on gen6, src1 on gen7).  The
+	  * MACH accumulates in the contribution of the upper 16 bits
+	  * of that operand.
+	  *
+	  * FINISHME: Emit just the MUL if we know an operand is small
+	  * enough.
+	  */
+	 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+	 emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
+	 emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]);
+	 emit(BRW_OPCODE_MOV, result_dst, src_reg(acc));
+      } else {
+	 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
+      }
       break;
    case ir_binop_div:
       assert(!"not reached: should be handled by ir_div_to_mul_rcp");

From eb0ff1a1c0f1978d867c748bf2525f717a56bfce Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Mon, 15 Aug 2011 10:58:25 -0700
Subject: [PATCH 410/600] mesa: Remove use of fpu_control.h

Remove the inclusion of fpu_control.h from compiler.h.  Since Bionic lacks
fpu_control.h, this fixes the Android build.

Also remove the sole use of the fpu_control bits, which was in debug.c.
Those were brianp's debug bits, and he approved of their removal.

Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/main/compiler.h |  3 ---
 src/mesa/main/debug.c    | 11 -----------
 2 files changed, 14 deletions(-)

diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h
index ee7d0b2f880..8ed1c6fa61f 100644
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -45,9 +45,6 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
-#if defined(__linux__) && defined(__i386__)
-#include <fpu_control.h>
-#endif
 #include <float.h>
 #include <stdarg.h>
 
diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c
index e7f6be99481..b1fc096f296 100644
--- a/src/mesa/main/debug.c
+++ b/src/mesa/main/debug.c
@@ -192,17 +192,6 @@ static void add_debug_flags( const char *debug )
    if (strstr(debug, "flush"))
       MESA_DEBUG_FLAGS |= DEBUG_ALWAYS_FLUSH;
 
-#if defined(_FPU_GETCW) && defined(_FPU_SETCW)
-   if (strstr(debug, "fpexceptions")) {
-      /* raise FP exceptions */
-      fpu_control_t mask;
-      _FPU_GETCW(mask);
-      mask &= ~(_FPU_MASK_IM | _FPU_MASK_DM | _FPU_MASK_ZM
-                | _FPU_MASK_OM | _FPU_MASK_UM);
-      _FPU_SETCW(mask);
-   }
-#endif
-
 #else
    (void) debug;
 #endif

From bd064a49f119d126623c0e85702801e4cee62187 Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Mon, 15 Aug 2011 13:26:21 -0700
Subject: [PATCH 411/600] mesa: Fix Android build by #ifdef'ing out locale
 support

Bionic does not support locales. This commit #ifdef's out the locale usage
in _mesa_strtof().

Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/main/imports.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index 0a572ec225d..8f097195922 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -753,7 +753,8 @@ _mesa_strdup( const char *s )
 float
 _mesa_strtof( const char *s, char **end )
 {
-#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__)
+#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) && \
+    !defined(ANDROID)
    static locale_t loc = NULL;
    if (!loc) {
       loc = newlocale(LC_CTYPE_MASK, "C", NULL);

From 3c9f172fe801a8e954a40affc38942b628b81bda Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Mon, 15 Aug 2011 13:29:15 -0700
Subject: [PATCH 412/600] mesa: Add Android to list of platforms that define
 fpclassify()

This is a fix for the Android build.

Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/main/querymatrix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c
index 944ad435f7a..eaedf7cd238 100644
--- a/src/mesa/main/querymatrix.c
+++ b/src/mesa/main/querymatrix.c
@@ -73,7 +73,7 @@ fpclassify(double x)
 #elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \
      defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \
      (defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__) || \
-     (defined(__sun) && defined(__GNUC__))
+     (defined(__sun) && defined(__GNUC__)) || defined(ANDROID)
 
 /* fpclassify is available. */
 

From 6ad08989d7c10892919ce1cb9c88c4cf8b73e1dc Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sat, 30 Jul 2011 10:48:10 -0700
Subject: [PATCH 413/600] ir_to_mesa: Implement ir_unop_logic_not using 1-x

Since our logic values are 0.0 (false) and 1.0 (true), 1.0 - x
accurately implements logical not.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 1ef609fe15d..f03ea7a95e0 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1135,7 +1135,13 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 
    switch (ir->operation) {
    case ir_unop_logic_not:
-      emit(ir, OPCODE_SEQ, result_dst, op[0], src_reg_for_float(0.0));
+      /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
+       * older GPUs implement SEQ using multiple instructions (i915 uses two
+       * SGE instructions and a MUL instruction).  Since our logic values are
+       * 0.0 and 1.0, 1-x also implements !x.
+       */
+      op[0].negate = ~op[0].negate;
+      emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
       break;
    case ir_unop_neg:
       op[0].negate = ~op[0].negate;

From 41f8ffe5e07c4f389eb13d17ecf0ff776890e9bc Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sat, 30 Jul 2011 10:49:49 -0700
Subject: [PATCH 414/600] ir_to_mesa: Implement ir_binop_logic_or using an add
 w/saturate or add w/SLT

Logical-or is implemented using addition (followed by clampling to
[0,1]) on values of 0.0 and 1.0.  Replacing the logical-or operators
with addition gives a + b which has a result on the range [0, 2].

Previously a SNE instruction was used to clamp the resulting logic
value to [0,1].  In a fragment shader, using a saturate on the add has
the same effect.  Adding the saturate to the add is free, so (at
least) one instruction is saved.

In a vertex shader, using an SLT on the negation of the add result has
the same effect.  Many older shader architectures do not support the
SNE instruction.  It must be emulated using two SLT instructions and
an ADD.  On these architectures, the single SLT saves two
instructions.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index f03ea7a95e0..fcd14c89cd7 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1267,11 +1267,28 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
       break;
 
-   case ir_binop_logic_or:
-      /* This could be a saturated add and skip the SNE. */
-      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
-      emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+   case ir_binop_logic_or: {
+      /* After the addition, the value will be an integer on the
+       * range [0,2].  Zero stays zero, and positive values become 1.0.
+       */
+      ir_to_mesa_instruction *add =
+	 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+	 /* The clamping to [0,1] can be done for free in the fragment
+	  * shader with a saturate.
+	  */
+	 add->saturate = true;
+      } else {
+	 /* Negating the result of the addition gives values on the range
+	  * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
+	  * is achieved using SLT.
+	  */
+	 src_reg slt_src = result_src;
+	 slt_src.negate = ~slt_src.negate;
+	 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+      }
       break;
+   }
 
    case ir_binop_logic_and:
       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */

From 7f4c65256cc3f4d9f6a214424beabe688a5dd6a2 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sat, 30 Jul 2011 10:45:35 -0700
Subject: [PATCH 415/600] ir_to_mesa: Make ir_to_mesa_visitor::emit_dp return
 the instruction

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index fcd14c89cd7..60d498bd9e3 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -297,11 +297,11 @@ public:
    /**
     * Emit the correct dot-product instruction for the type of arguments
     */
-   void emit_dp(ir_instruction *ir,
-	        dst_reg dst,
-	        src_reg src0,
-	        src_reg src1,
-	        unsigned elements);
+   ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
+				    dst_reg dst,
+				    src_reg src0,
+				    src_reg src1,
+				    unsigned elements);
 
    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 		    dst_reg dst, src_reg src0);
@@ -408,7 +408,7 @@ ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 }
 
-void
+ir_to_mesa_instruction *
 ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
 			    dst_reg dst, src_reg src0, src_reg src1,
 			    unsigned elements)
@@ -417,7 +417,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
       OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
    };
 
-   emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 }
 
 /**

From 92ca560d68e8a6b532998707afcf4f60c0ce2806 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 3 Aug 2011 15:27:43 -0700
Subject: [PATCH 416/600] ir_to_mesa: Implement ir_unop_any using DP4
 w/saturate or DP4 w/SLT

This is just like the ir_binop_logic_or case.  The operation
ir_unop_any is (a.x || a.y || a.z || a.w).  Logical-or is implemented
using addition (followed by clampling to [0,1]) on values of 0.0 and
1.0.  Replacing the logical-or operators with addition gives (a.x +
a.y + a.z + a.w).  This can be implemented using a dot-product with a
vector of all 1.0.

Previously a SNE instruction was used to clamp the resulting logic
value to [0,1].  In a fragment shader, using a saturate on the
dot-product has the same effect.  Adding the saturate to the
dot-product is free, so (at least) one instruction is saved.

In a vertex shader, using an SLT on the negation of the dot-product
result has the same effect.  Many older shader architectures do not
support the SNE instruction.  It must be emulated using two SLT
instructions and an ADD.  On these architectures, the single SLT saves
two instructions.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 60d498bd9e3..1bd9a2eee1b 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1256,12 +1256,31 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       }
       break;
 
-   case ir_unop_any:
+   case ir_unop_any: {
       assert(ir->operands[0]->type->is_vector());
-      emit_dp(ir, result_dst, op[0], op[0],
-	      ir->operands[0]->type->vector_elements);
-      emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+      /* After the dot-product, the value will be an integer on the
+       * range [0,4].  Zero stays zero, and positive values become 1.0.
+       */
+      ir_to_mesa_instruction *const dp =
+	 emit_dp(ir, result_dst, op[0], op[0],
+		 ir->operands[0]->type->vector_elements);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+	 /* The clamping to [0,1] can be done for free in the fragment
+	  * shader with a saturate.
+	  */
+	 dp->saturate = true;
+      } else {
+	 /* Negating the result of the dot-product gives values on the range
+	  * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+	  * is achieved using SLT.
+	  */
+	 src_reg slt_src = result_src;
+	 slt_src.negate = ~slt_src.negate;
+	 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+      }
       break;
+   }
 
    case ir_binop_logic_xor:
       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);

From e7bf096e8b04931996c8c56548ce0b2c0af3a0dc Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 3 Aug 2011 15:35:01 -0700
Subject: [PATCH 417/600] ir_to_mesa: Implement ir_binop_any_nequal using DP4
 w/saturate or DP4 w/SLT

The operation ir_binop_any_nequal is (a.x != b.x) || (a.y != b.y) ||
(a.z != b.z) || (a.w != b.w), and that is the same as any(bvec4(a.x !=
b.x, a.y != b.y, a.z != b.z, a.w != b.w)).  Implement the any() part
the same way the regular ir_unop_any is implemented.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 1bd9a2eee1b..1c674ea8756 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1249,8 +1249,26 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 	  ir->operands[1]->type->is_vector()) {
 	 src_reg temp = get_temp(glsl_type::vec4_type);
 	 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
-	 emit_dp(ir, result_dst, temp, temp, vector_elements);
-	 emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+	 /* After the dot-product, the value will be an integer on the
+	  * range [0,4].  Zero stays zero, and positive values become 1.0.
+	  */
+	 ir_to_mesa_instruction *const dp =
+	    emit_dp(ir, result_dst, temp, temp, vector_elements);
+	 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+	    /* The clamping to [0,1] can be done for free in the fragment
+	     * shader with a saturate.
+	     */
+	    dp->saturate = true;
+	 } else {
+	    /* Negating the result of the dot-product gives values on the range
+	     * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+	     * achieved using SLT.
+	     */
+	    src_reg slt_src = result_src;
+	    slt_src.negate = ~slt_src.negate;
+	    emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+	 }
       } else {
 	 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
       }

From ba01df11c4d09c65514a8522cb319e29034ab5a8 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 3 Aug 2011 15:42:05 -0700
Subject: [PATCH 418/600] ir_to_mesa: Implement ir_binop_all_equal using DP4
 w/SGE

The operation ir_binop_all_equal is !(a.x != b.x || a.y != b.y || a.z
!= b.z || a.w != b.w).  Logical-or is implemented using addition
(followed by clampling to [0,1]) on values of 0.0 and 1.0.  Replacing
the logical-or operators with addition gives !bool((int(a.x != b.x) +
int(a.y == b.y) + int(a.z == b.z) + int(a.w == b.w)).  This can be
implemented using a dot-product with a vector of all 1.0.  After the
dot-product, the value will be an integer on the range [0,4].

Previously a SEQ instruction was used to clamp the resulting logic
value to [0,1] and invert the result.  Using an SGE instruction on the
negation of the dot-product result has the same effect.  Many older
shader architectures do not support the SEQ instruction.  It must be
emulated using two SGE instructions and a MUL.  On these
architectures, the single SGE saves two instructions.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 1c674ea8756..4c8b097de6b 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1237,8 +1237,19 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 	  ir->operands[1]->type->is_vector()) {
 	 src_reg temp = get_temp(glsl_type::vec4_type);
 	 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
+
+	 /* After the dot-product, the value will be an integer on the
+	  * range [0,4].  Zero becomes 1.0, and positive values become zero.
+	  */
 	 emit_dp(ir, result_dst, temp, temp, vector_elements);
-	 emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0));
+
+	 /* Negating the result of the dot-product gives values on the range
+	  * [-4, 0].  Zero becomes 1.0, and negative values become zero.  This
+	  * achieved using SGE.
+	  */
+	 src_reg sge_src = result_src;
+	 sge_src.negate = ~sge_src.negate;
+	 emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
       } else {
 	 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
       }

From ff2cfb8989cd79218dfe2cd8c3de20f1ca7418e6 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 2 Aug 2011 12:17:20 -0700
Subject: [PATCH 419/600] ir_to_mesa: Emit a MAD(b, -a, b) for !a && b

!a && b occurs frequently when nexted if-statements have been
flattened.  It should also be possible use a MAD for (a && b) || c,
though that would require a MAD_SAT.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/ir_to_mesa.cpp | 52 +++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 4c8b097de6b..b1211c1145c 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -314,6 +314,8 @@ public:
 
    GLboolean try_emit_mad(ir_expression *ir,
 			  int mul_operand);
+   bool try_emit_mad_for_and_not(ir_expression *ir,
+				 int mul_operand);
    GLboolean try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
@@ -892,6 +894,46 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    return true;
 }
 
+/**
+ * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ *     - a * !b
+ *     - a * (1 - b)
+ *     - (a * 1) - (a * b)
+ *     - a + -(a * b)
+ *     - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+   const int other_operand = 1 - try_operand;
+   src_reg a, b;
+
+   ir_expression *expr = ir->operands[try_operand]->as_expression();
+   if (!expr || expr->operation != ir_unop_logic_not)
+      return false;
+
+   ir->operands[other_operand]->accept(this);
+   a = this->result;
+   expr->operands[0]->accept(this);
+   b = this->result;
+
+   b.negate = ~b.negate;
+
+   this->result = get_temp(ir->type);
+   emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
+
+   return true;
+}
+
 GLboolean
 ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
 {
@@ -1088,6 +1130,16 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       if (try_emit_mad(ir, 0))
 	 return;
    }
+
+   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+    */
+   if (ir->operation == ir_binop_logic_and) {
+      if (try_emit_mad_for_and_not(ir, 1))
+	 return;
+      if (try_emit_mad_for_and_not(ir, 0))
+	 return;
+   }
+
    if (try_emit_sat(ir))
       return;
 

From 54c48a95e6e0573886433f94ac83293876ffe03d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Thu, 10 Feb 2011 15:48:27 -0800
Subject: [PATCH 420/600] mesa: Add partial constant propagation pass for Mesa
 IR

This cleans up some code generated by the IR-to-Mesa pass for i915.
In particular, some shaders involving arrays of constant matrices
result in really bad code.

v2: Silence several warnings from merging the gl_constant_value work.
Fix DP[23] folding.  Add support for a bunch more opcodes that appear
in piglit runs on i915.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/SConscript                       |   1 +
 src/mesa/program/prog_opt_constant_fold.c | 451 ++++++++++++++++++++++
 src/mesa/program/prog_optimize.c          |   2 +
 src/mesa/program/prog_optimize.h          |   3 +
 src/mesa/sources.mak                      |   1 +
 5 files changed, 458 insertions(+)
 create mode 100644 src/mesa/program/prog_opt_constant_fold.c

diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 05aa0e8010e..b0c3334fa48 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -293,6 +293,7 @@ program_sources = [
     'program/prog_instruction.c',
     'program/prog_noise.c',
     'program/prog_optimize.c',
+    'program/prog_opt_constant_fold.c',
     'program/prog_parameter.c',
     'program/prog_parameter_layout.c',
     'program/prog_print.c',
diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c
new file mode 100644
index 00000000000..e2418b55451
--- /dev/null
+++ b/src/mesa/program/prog_opt_constant_fold.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "program.h"
+#include "prog_instruction.h"
+#include "prog_optimize.h"
+#include "prog_parameter.h"
+#include <stdbool.h>
+
+static bool
+src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
+{
+   unsigned i;
+
+   for (i = 0; i < num_srcs; i++) {
+      if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
+	 return false;
+   }
+
+   return true;
+}
+
+static struct prog_src_register
+src_reg_for_float(struct gl_program *prog, float val)
+{
+   struct prog_src_register src;
+   unsigned swiz;
+
+   memset(&src, 0, sizeof(src));
+
+   src.File = PROGRAM_CONSTANT;
+   src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+					  (gl_constant_value *) &val, 1, &swiz);
+   src.Swizzle = swiz;
+   return src;
+}
+
+static struct prog_src_register
+src_reg_for_vec4(struct gl_program *prog, const float *val)
+{
+   struct prog_src_register src;
+   unsigned swiz;
+
+   memset(&src, 0, sizeof(src));
+
+   src.File = PROGRAM_CONSTANT;
+   src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+					  (gl_constant_value *) val, 4, &swiz);
+   src.Swizzle = swiz;
+   return src;
+}
+
+static bool
+src_regs_are_same(const struct prog_src_register *a,
+		  const struct prog_src_register *b)
+{
+   return (a->File == b->File)
+      && (a->Index == b->Index)
+      && (a->Swizzle == b->Swizzle)
+      && (a->Abs == b->Abs)
+      && (a->Negate == b->Negate)
+      && (a->RelAddr == 0)
+      && (b->RelAddr == 0);
+}
+
+static void
+get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
+{
+   const gl_constant_value *const value =
+      prog->Parameters->ParameterValues[r->Index];
+
+   data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
+   data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
+   data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
+   data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
+
+   if (r->Abs) {
+      data[0] = fabsf(data[0]);
+      data[1] = fabsf(data[1]);
+      data[2] = fabsf(data[2]);
+      data[3] = fabsf(data[3]);
+   }
+
+   if (r->Negate & 0x01) {
+      data[0] = -data[0];
+   }
+
+   if (r->Negate & 0x02) {
+      data[1] = -data[1];
+   }
+
+   if (r->Negate & 0x04) {
+      data[2] = -data[2];
+   }
+
+   if (r->Negate & 0x08) {
+      data[3] = -data[3];
+   }
+}
+
+/**
+ * Try to replace instructions that produce a constant result with simple moves
+ *
+ * The hope is that a following copy propagation pass will eliminate the
+ * unnecessary move instructions.
+ */
+GLboolean
+_mesa_constant_fold(struct gl_program *prog)
+{
+   bool progress = false;
+   unsigned i;
+
+   for (i = 0; i < prog->NumInstructions; i++) {
+      struct prog_instruction *const inst = &prog->Instructions[i];
+
+      switch (inst->Opcode) {
+      case OPCODE_ADD:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = a[0] + b[0];
+	    result[1] = a[1] + b[1];
+	    result[2] = a[2] + b[2];
+	    result[3] = a[3] + b[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_CMP:
+	 /* FINISHME: We could also optimize CMP instructions where the first
+	  * FINISHME: source is a constant that is either all < 0.0 or all
+	  * FINISHME: >= 0.0.
+	  */
+	 if (src_regs_are_constant(inst, 3)) {
+	    float a[4];
+	    float b[4];
+	    float c[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+	    get_value(prog, &inst->SrcReg[2], c);
+
+            result[0] = a[0] < 0.0f ? b[0] : c[0];
+            result[1] = a[1] < 0.0f ? b[1] : c[1];
+            result[2] = a[2] < 0.0f ? b[2] : c[2];
+            result[3] = a[3] < 0.0f ? b[3] : c[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+	    inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_DP2:
+      case OPCODE_DP3:
+      case OPCODE_DP4:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result;
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    /* It seems like a loop could be used here, but we cleverly put
+	     * DP2A between DP2 and DP3.  Subtracting DP2 (or similar) from
+	     * the opcode results in various failures of the loop control.
+	     */
+	    result = (a[0] * b[0]) + (a[1] * b[1]);
+
+	    if (inst->Opcode >= OPCODE_DP3)
+	       result += a[2] * b[2];
+
+	    if (inst->Opcode == OPCODE_DP4)
+	       result += a[3] * b[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_MUL:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = a[0] * b[0];
+	    result[1] = a[1] * b[1];
+	    result[2] = a[2] * b[2];
+	    result[3] = a[3] * b[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SEQ:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SGE:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SGT:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SLE:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SLT:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SNE:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      default:
+	 break;
+      }
+   }
+
+   return progress;
+}
diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index 3340ce0498b..25d9684b137 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -1358,6 +1358,8 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
          any_change = GL_TRUE;
       if (_mesa_remove_dead_code_local(program))
          any_change = GL_TRUE;
+
+      any_change = _mesa_constant_fold(program) || any_change;
       _mesa_reallocate_registers(program);
    } while (any_change);
 }
diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h
index 463f5fc51c4..9854fb7a491 100644
--- a/src/mesa/program/prog_optimize.h
+++ b/src/mesa/program/prog_optimize.h
@@ -44,4 +44,7 @@ _mesa_find_temp_intervals(const struct prog_instruction *instructions,
 extern void
 _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program);
 
+extern GLboolean
+_mesa_constant_fold(struct gl_program *prog);
+
 #endif
diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak
index ed008f8813e..5e77e0f5919 100644
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@ -251,6 +251,7 @@ PROGRAM_SOURCES = \
 	program/prog_instruction.c \
 	program/prog_noise.c \
 	program/prog_optimize.c \
+	program/prog_opt_constant_fold.c \
 	program/prog_parameter.c \
 	program/prog_parameter_layout.c \
 	program/prog_print.c \

From 7125f1e87df359be4aad1d801b633146eeac7292 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 3 Aug 2011 17:12:29 -0700
Subject: [PATCH 421/600] mesa: Bump instruction execution limit to 65536

Shader Model 3.0[1] requires that shaders be able to execute at least
65536 instructions.  Bump Mesa maxExec to that limit.  This allows
several vertex shaders in the OpenGL ES 2.0 conformance test suite to
run to completion.

1: http://en.wikipedia.org/wiki/High_Level_Shader_Language

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/program/prog_execute.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index dbfd1b91875..c70a1e344e5 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -639,7 +639,7 @@ _mesa_execute_program(struct gl_context * ctx,
                       struct gl_program_machine *machine)
 {
    const GLuint numInst = program->NumInstructions;
-   const GLuint maxExec = 10000;
+   const GLuint maxExec = 65536;
    GLuint pc, numExec = 0;
 
    machine->CurProgram = program;

From b629d5ba24f76ed6af35455a874d351fde1e5bbe Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Fri, 1 Jul 2011 13:49:18 +0300
Subject: [PATCH 422/600] xmlconfig: Make the error message more informative

---
 src/mesa/drivers/dri/common/xmlconfig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c
index 77967ac2a43..12dd31bb162 100644
--- a/src/mesa/drivers/dri/common/xmlconfig.c
+++ b/src/mesa/drivers/dri/common/xmlconfig.c
@@ -567,7 +567,7 @@ static void parseOptInfoAttr (struct OptInfoData *data, const XML_Char **attr) {
     } else
 	defaultVal = attrVal[OA_DEFAULT];
     if (!parseValue (&cache->values[opt], cache->info[opt].type, defaultVal))
-	XML_FATAL ("illegal default value: %s.", defaultVal);
+	XML_FATAL ("illegal default value for %s: %s.", cache->info[opt].name, defaultVal);
 
     if (attrVal[OA_VALID]) {
 	if (cache->info[opt].type == DRI_BOOL)

From 59e56957cce16e5d993974e4b7f339afc9cb949b Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Fri, 1 Jul 2011 13:01:00 +0300
Subject: [PATCH 423/600] xmlpool.h: fix a typo

---
 src/mesa/drivers/dri/common/xmlpool.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/common/xmlpool.h b/src/mesa/drivers/dri/common/xmlpool.h
index 587517ea10a..ffea430024d 100644
--- a/src/mesa/drivers/dri/common/xmlpool.h
+++ b/src/mesa/drivers/dri/common/xmlpool.h
@@ -60,7 +60,7 @@
 #define DRI_CONF_OPT_BEGIN(name,type,def) \
 "<option name=\""#name"\" type=\""#type"\" default=\""#def"\">\n"
 
-/** \brief Begin an option definition with qouted default value */
+/** \brief Begin an option definition with quoted default value */
 #define DRI_CONF_OPT_BEGIN_Q(name,type,def) \
 "<option name=\""#name"\" type=\""#type"\" default="#def">\n"
 

From 65bdb878a0afcdffe51716ae4533a04990c7d556 Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Fri, 1 Jul 2011 12:57:06 +0300
Subject: [PATCH 424/600] st/dri: Indent driconf options

---
 .../state_trackers/dri/common/dri_screen.c     | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c
index 5931df993b0..c9647945d6e 100644
--- a/src/gallium/state_trackers/dri/common/dri_screen.c
+++ b/src/gallium/state_trackers/dri/common/dri_screen.c
@@ -42,13 +42,17 @@
 #include "util/u_debug.h"
 
 PUBLIC const char __driConfigOptions[] =
-   DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE
-   DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
-   DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
-   DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY
-/* DRI_CONF_FORCE_S3TC_ENABLE(false) */
-   DRI_CONF_ALLOW_LARGE_TEXTURES(1)
-   DRI_CONF_SECTION_END DRI_CONF_END;
+   DRI_CONF_BEGIN
+      DRI_CONF_SECTION_PERFORMANCE
+         DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
+         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+      DRI_CONF_SECTION_END
+
+      DRI_CONF_SECTION_QUALITY
+/*       DRI_CONF_FORCE_S3TC_ENABLE(false) */
+         DRI_CONF_ALLOW_LARGE_TEXTURES(1)
+      DRI_CONF_SECTION_END
+   DRI_CONF_END;
 
 static const uint __driNConfigOptions = 3;
 

From 718b894dbb585af52dd24defb2e8c130216e5485 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 16 Aug 2011 16:30:52 -0600
Subject: [PATCH 425/600] st/mesa: fix incorrect loop over instruction src regs

The array of src regs is of size 3, not 4.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index aef23e7d207..7b90c812595 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3443,7 +3443,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
          /* Continuing the block, clear any channels from the write array that
           * are read by this instruction.
           */
-         for (int i = 0; i < 4; i++) {
+         for (unsigned i = 0; i < Elements(inst->src); i++) {
             if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
                /* Any temporary might be read, so no dead code elimination 
                 * across this instruction.

From 34980cd153050111edc545545ddff11f5b68347e Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 17 Aug 2011 20:38:40 +0200
Subject: [PATCH 426/600] nv50: don't drop flags definition when merging SAT
 with ADD/MAD

---
 src/gallium/drivers/nv50/nv50_pc_optimize.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index d72b23c137a..a1590c2dc6b 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -462,6 +462,8 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
          mi->saturate = 1;
          mi->def[0] = nvi->def[0];
          mi->def[0]->insn = mi;
+         mi->flags_def = nvi->flags_def;
+         mi->flags_def->insn = mi;
          nv_nvi_delete(nvi);
       }
    }

From 3f78f719732b87e6707f94c187ad6e263c6c2ef0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Aug 2011 22:36:18 -0700
Subject: [PATCH 427/600] i965/fs: Fix 32-bit integer multiplication.

The MUL opcode does a 16bit * 32bit multiply, and we need to do the
MACH to get the top 16bit * 32bit added in.

Fixes fs-op-mult-int-*, fs-op-mult-ivec*

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp    |  5 +++++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 18 +++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 7367ccaa7e0..482d250c333 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -630,6 +630,11 @@ fs_visitor::generate_code()
       case BRW_OPCODE_MUL:
 	 brw_MUL(p, dst, src[0], src[1]);
 	 break;
+      case BRW_OPCODE_MACH:
+	 brw_set_acc_write_control(p, 1);
+	 brw_MACH(p, dst, src[0], src[1]);
+	 brw_set_acc_write_control(p, 0);
+	 break;
 
       case BRW_OPCODE_FRC:
 	 brw_FRC(p, dst, src[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 8b4f5bbac15..2dc9132cec6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -287,7 +287,23 @@ fs_visitor::visit(ir_expression *ir)
       break;
 
    case ir_binop_mul:
-      emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
+      if (ir->type->is_integer()) {
+	 /* For integer multiplication, the MUL uses the low 16 bits
+	  * of one of the operands (src0 on gen6, src1 on gen7).  The
+	  * MACH accumulates in the contribution of the upper 16 bits
+	  * of that operand.
+	  *
+	  * FINISHME: Emit just the MUL if we know an operand is small
+	  * enough.
+	  */
+	 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+	 emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
+	 emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]);
+	 emit(BRW_OPCODE_MOV, this->result, fs_reg(acc));
+      } else {
+	 emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
+      }
       break;
    case ir_binop_div:
       assert(!"not reached: should be handled by ir_div_to_mul_rcp");

From 17867f06b173b90ba0e1b9137514b4dcce415822 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Wed, 17 Aug 2011 22:53:10 +0200
Subject: [PATCH 428/600] r600g: fix scons build

---
 src/gallium/targets/dri-r600/SConscript | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/targets/dri-r600/SConscript b/src/gallium/targets/dri-r600/SConscript
index 1df11a8747b..c10d65b595a 100644
--- a/src/gallium/targets/dri-r600/SConscript
+++ b/src/gallium/targets/dri-r600/SConscript
@@ -6,6 +6,7 @@ env.Append(CPPDEFINES = ['GALLIUM_RBUG', 'GALLIUM_TRACE'])
 
 env.Prepend(LIBS = [
     st_dri,
+    radeonwinsys,
     r600winsys,
     r600,
     trace,

From 01680ce2f350f44073e8f1adf3b36d48424d21fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Tue, 16 Aug 2011 09:47:16 +0200
Subject: [PATCH 429/600] r600g: implement NV_primitive_restart functionality
 (v2)

Needed for GL3.

v2: evergreen support

I don't set PA_SU_SC_MODE_CNTL.MULTI_PRIM_IB_ENA.
piglit/primitive-restart does pass though. Tested on RV730 and EG-REDWOOD.
---
 src/gallium/drivers/r600/evergreend.h              | 4 ++++
 src/gallium/drivers/r600/r600_pipe.c               | 2 +-
 src/gallium/drivers/r600/r600_state_common.c       | 4 ++++
 src/gallium/winsys/r600/drm/evergreen_hw_context.c | 2 ++
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 96dbd4da91b..9a8c353e4ee 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1697,6 +1697,10 @@
 #define R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL           0x00028A3C
 #define R_028A48_PA_SC_MODE_CNTL_0                   0x00028A48
 #define R_028A4C_PA_SC_MODE_CNTL_1                   0x00028A4C
+#define R_028A94_VGT_MULTI_PRIM_IB_RESET_EN          0x00028A94
+#define   S_028A94_RESET_EN(x)                         (((x) & 0x1) << 0)
+#define   G_028A94_RESET_EN(x)                         (((x) >> 0) & 0x1)
+#define   C_028A94_RESET_EN                            0xFFFFFFFE
 #define R_028AB4_VGT_REUSE_OFF                       0x00028AB4
 #define R_028AB8_VGT_VTX_CNT_EN                      0x00028AB8
 #define R_028ABC_DB_HTILE_SURFACE                    0x00028ABC
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 4cf02c9b18e..d180e36aa16 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -360,6 +360,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_SM3:
 	case PIPE_CAP_SEAMLESS_CUBE_MAP:
 	case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL:
+	case PIPE_CAP_PRIMITIVE_RESTART:
 		return 1;
 
 	/* Supported except the original R600. */
@@ -374,7 +375,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 
 	/* Unsupported features. */
 	case PIPE_CAP_STREAM_OUTPUT:
-	case PIPE_CAP_PRIMITIVE_RESTART:
 	case PIPE_CAP_TGSI_INSTANCEID:
 	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
 	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 53a1313a2a8..853458f0156 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -619,6 +619,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, draw.info.restart_index, 0xFFFFFFFF, NULL, 0);
+		r600_pipe_state_add_reg(&rctx->vgt, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, draw.info.primitive_restart, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL, 0);
 		r600_pipe_state_add_reg(&rctx->vgt, R_028814_PA_SU_SC_MODE_CNTL,
@@ -633,6 +635,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	r600_pipe_state_mod_reg(&rctx->vgt, draw.info.max_index);
 	r600_pipe_state_mod_reg(&rctx->vgt, draw.info.min_index);
 	r600_pipe_state_mod_reg(&rctx->vgt, draw.info.index_bias);
+	r600_pipe_state_mod_reg(&rctx->vgt, draw.info.restart_index);
+	r600_pipe_state_mod_reg(&rctx->vgt, draw.info.primitive_restart);
 	r600_pipe_state_mod_reg(&rctx->vgt, 0);
 	r600_pipe_state_mod_reg(&rctx->vgt, draw.info.start_instance);
 	if (draw.info.mode == PIPE_PRIM_QUADS || draw.info.mode == PIPE_PRIM_QUAD_STRIP || draw.info.mode == PIPE_PRIM_POLYGON) {
diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
index 30bb0b8223c..3417eb39192 100644
--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -159,6 +159,7 @@ static const struct r600_reg evergreen_context_reg_list[] = {
 	{R_028404_VGT_MIN_VTX_INDX, 0, 0, 0},
 	{R_028408_VGT_INDX_OFFSET, 0, 0, 0},
 	{R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0},
+	{R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0, 0},
 	{GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0},
 	{R_028414_CB_BLEND_RED, 0, 0, 0},
@@ -523,6 +524,7 @@ static const struct r600_reg cayman_context_reg_list[] = {
 	{R_028404_VGT_MIN_VTX_INDX, 0, 0, 0},
 	{R_028408_VGT_INDX_OFFSET, 0, 0, 0},
 	{R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0},
+	{R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 0, 0, 0},
 	{GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0},
 	{R_028414_CB_BLEND_RED, 0, 0, 0},

From 4a7667b96b7bd7cdffbe929182c15935b74facd2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= <krh@bitplanet.net>
Date: Thu, 18 Aug 2011 08:46:02 -0400
Subject: [PATCH 430/600] glx: Don't flush twice if we fallback to
 dri2CopySubBuffer

The flush extensions flush call indicates end of frame and should only
be called once per frame.  However, in the dri2SwapBuffer fallback
path, we call flush and then call dri2CopySubBuffer, which also calls
flush.  Refactor the code to only call flush once.
---
 src/glx/dri2_glx.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index d9524d765bd..9fa0d5ad362 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -543,6 +543,11 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
 	(struct dri2_display *)dpyPriv->dri2Display;
     CARD64 ret = 0;
 
+    /* Old servers can't handle swapbuffers */
+    if (!pdp->swapAvailable) {
+       dri2CopySubBuffer(pdraw, 0, 0, priv->width, priv->height);
+    } else {
+#ifdef X_DRI2SwapBuffers
 #ifdef __DRI2_FLUSH
     if (psc->f) {
        struct glx_context *gc = __glXGetCurrentContext();
@@ -553,21 +558,15 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
     }
 #endif
 
+       DRI2SwapBuffers(psc->base.dpy, pdraw->xDrawable,
+		       target_msc, divisor, remainder, &ret);
+#endif
+    }
+
     /* Old servers don't send invalidate events */
     if (!pdp->invalidateAvailable)
        dri2InvalidateBuffers(dpyPriv->dpy, pdraw->xDrawable);
 
-    /* Old servers can't handle swapbuffers */
-    if (!pdp->swapAvailable) {
-       dri2CopySubBuffer(pdraw, 0, 0, priv->width, priv->height);
-       return 0;
-    }
-
-#ifdef X_DRI2SwapBuffers
-    DRI2SwapBuffers(psc->base.dpy, pdraw->xDrawable, target_msc, divisor,
-		    remainder, &ret);
-#endif
-
     return ret;
 }
 

From 7be4cf9c63d3dcacce8a6bf47c471a9aab09b1d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Thu, 18 Aug 2011 16:00:03 +0100
Subject: [PATCH 431/600] scons: Add support for LLVM-2.9 on Windows.

MinGW & MSVC, although I've only tested the former.
---
 scons/llvm.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/scons/llvm.py b/scons/llvm.py
index 66f972df5fb..c8d2d372e4a 100644
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -92,7 +92,21 @@ def generate(env):
             'HAVE_STDINT_H',
         ])
         env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
-        if llvm_version >= distutils.version.LooseVersion('2.7'):
+        if llvm_version >= distutils.version.LooseVersion('2.9'):
+            # 2.9
+            env.Prepend(LIBS = [
+                'LLVMObject', 'LLVMMCJIT', 'LLVMMCDisassembler',
+                'LLVMLinker', 'LLVMipo', 'LLVMInterpreter',
+                'LLVMInstrumentation', 'LLVMJIT', 'LLVMExecutionEngine',
+                'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
+                'LLVMMCParser', 'LLVMX86AsmPrinter', 'LLVMX86CodeGen',
+                'LLVMSelectionDAG', 'LLVMX86Utils', 'LLVMX86Info', 'LLVMAsmPrinter',
+                'LLVMCodeGen', 'LLVMScalarOpts', 'LLVMInstCombine',
+                'LLVMTransformUtils', 'LLVMipa', 'LLVMAsmParser',
+                'LLVMArchive', 'LLVMBitReader', 'LLVMAnalysis', 'LLVMTarget',
+                'LLVMCore', 'LLVMMC', 'LLVMSupport',
+            ])
+        elif llvm_version >= distutils.version.LooseVersion('2.7'):
             # 2.7
             env.Prepend(LIBS = [
                 'LLVMLinker', 'LLVMipo', 'LLVMInterpreter',

From 09042e08cbdfe61bb860a9d62151e136e6e278a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Thu, 18 Aug 2011 16:06:00 +0100
Subject: [PATCH 432/600] llvmpipe: Add u_math.h include.

Necessary on platforms with incomplete math.h
---
 src/gallium/drivers/llvmpipe/lp_test_arit.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c
index f0e43e0f9cc..ea2a659142f 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c
@@ -32,6 +32,7 @@
 
 #include "util/u_pointer.h"
 #include "util/u_memory.h"
+#include "util/u_math.h"
 
 #include "gallivm/lp_bld.h"
 #include "gallivm/lp_bld_init.h"

From a7f67b1c50d5d835c371d9aeaeca681fbd354ef8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Thu, 18 Aug 2011 17:58:02 +0100
Subject: [PATCH 433/600] llvmpipe: snprintf->util_snprintf.

For MSVC.
---
 src/gallium/drivers/llvmpipe/lp_screen.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index e3f8c19679f..9e2a45caad6 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -30,6 +30,7 @@
 #include "util/u_math.h"
 #include "util/u_cpu_detect.h"
 #include "util/u_format.h"
+#include "util/u_string.h"
 #include "util/u_format_s3tc.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
@@ -94,7 +95,7 @@ static const char *
 llvmpipe_get_name(struct pipe_screen *screen)
 {
    static char buf[100];
-   snprintf(buf, sizeof(buf), "llvmpipe (LLVM 0x%x)", HAVE_LLVM);
+   util_snprintf(buf, sizeof(buf), "llvmpipe (LLVM 0x%x)", HAVE_LLVM);
    return buf;
 }
 

From 762bf931ca25a6aea62c1344ea2360496479f781 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Thu, 18 Aug 2011 18:04:44 +0100
Subject: [PATCH 434/600] llvmpipe: Don't build lp_test_arit on MSVC.

Several issues due to expf/logf/etc either not being declared,
or being defined as a macro.
---
 src/gallium/drivers/llvmpipe/SConscript | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 2b232a524ae..129de297824 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -85,7 +85,6 @@ if not env['embedded']:
     env.Prepend(LIBS = [llvmpipe] + gallium)
 
     tests = [
-        'arit',
         'format',
         'blend',
         'conv',
@@ -94,6 +93,7 @@ if not env['embedded']:
     ]
 
     if not env['msvc']:
+        tests.append('arit')
         tests.append('round')
 
     for test in tests:

From e9ae4cadf5008f25aeae1bf4fa0c9c3397ee8c18 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Tue, 16 Aug 2011 10:23:52 -0700
Subject: [PATCH 435/600] glapi: update .gitignore for generated ES dispatch
 headers

Commit 6eff33dc (glapi: generate ES dispatch headers from core mesa)
replaced the autogenerated files
src/mapi/es1api/main/{dispatch,remap_helper}.h with new autogenerated
files src/mesa/main/api_exec_es{1,2}_{dispatch,remap_helper}.h.  This
patch updates the .gitignore files to properly ignore the new
autogenerated files, and stop ignoring the old autogenerated files.

Reviewed-by: Chia-I Wu <olv@lunarg.com>
---
 src/mapi/es1api/.gitignore | 3 ---
 src/mesa/main/.gitignore   | 4 ++++
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/mapi/es1api/.gitignore b/src/mapi/es1api/.gitignore
index b21f1d14c6f..dfe465677c4 100644
--- a/src/mapi/es1api/.gitignore
+++ b/src/mapi/es1api/.gitignore
@@ -1,4 +1 @@
 glapi_mapi_tmp.h
-glapi-stamp
-glapi
-main
diff --git a/src/mesa/main/.gitignore b/src/mesa/main/.gitignore
index 2575f44df4a..d0744e3f0d7 100644
--- a/src/mesa/main/.gitignore
+++ b/src/mesa/main/.gitignore
@@ -4,3 +4,7 @@ get_es1.c
 get_es2.c
 git_sha1.h
 git_sha1.h.tmp
+api_exec_es1_dispatch.h
+api_exec_es1_remap_helper.h
+api_exec_es2_dispatch.h
+api_exec_es2_remap_helper.h

From ccecc08f79afc020c8c0acface04a8e53e3a7c32 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 18 Aug 2011 11:51:48 -0600
Subject: [PATCH 436/600] mesa: fix incorrect error code in
 _mesa_FramebufferTexture1D/3DEXT()

The spec says GL_INVALID_OPERATION is generated when texture!=0 and
textarget is not a legal value.  We had this right for the 2D function.
---
 src/mesa/main/fbobject.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 82eb7fb718d..7646f9bdb03 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1985,7 +1985,7 @@ _mesa_FramebufferTexture1DEXT(GLenum target, GLenum attachment,
    GET_CURRENT_CONTEXT(ctx);
 
    if ((texture != 0) && (textarget != GL_TEXTURE_1D)) {
-      _mesa_error(ctx, GL_INVALID_ENUM,
+      _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glFramebufferTexture1DEXT(textarget)");
       return;
    }
@@ -2023,7 +2023,7 @@ _mesa_FramebufferTexture3DEXT(GLenum target, GLenum attachment,
    GET_CURRENT_CONTEXT(ctx);
 
    if ((texture != 0) && (textarget != GL_TEXTURE_3D)) {
-      _mesa_error(ctx, GL_INVALID_ENUM,
+      _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glFramebufferTexture3DEXT(textarget)");
       return;
    }

From e330d90e919cebd64f720ccfb5dca94c2ded4e4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Thu, 18 Aug 2011 22:59:22 +0200
Subject: [PATCH 437/600] r600g: fix a possible crash in r600_adjust_gprs

---
 src/gallium/drivers/r600/r600_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 0757eab2ea7..19bef31468d 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1676,7 +1676,7 @@ void r600_adjust_gprs(struct r600_pipe_context *rctx)
 	if (rctx->chip_class >= EVERGREEN)
 		return;
 
-	if (!rctx->ps_shader && !rctx->vs_shader)
+	if (!rctx->ps_shader || !rctx->vs_shader)
 		return;
 
 	if (rctx->ps_shader->shader.bc.ngpr > rctx->default_ps_gprs)

From e975e18bebb54b20f00c568e7870932402b79f8f Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 19 Aug 2011 08:36:22 -0600
Subject: [PATCH 438/600] glx: use a block to fix declarations after code
 warning

---
 src/glx/glxcmds.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
index fc0a07901a7..c8ec9c21fed 100644
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -794,15 +794,17 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable)
    gc = __glXGetCurrentContext();
 
 #if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
-   __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable);
+   {
+      __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable);
 
-   if (pdraw != NULL) {
-      if (gc && drawable == gc->currentDrawable) {
-	 glFlush();
+      if (pdraw != NULL) {
+         if (gc && drawable == gc->currentDrawable) {
+            glFlush();
+         }
+
+         (*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0);
+         return;
       }
-
-      (*pdraw->psc->driScreen->swapBuffers)(pdraw, 0, 0, 0);
-      return;
    }
 #endif
 

From 778997f9dfeffbcbeef52f7dfa93a9ebd40ff42f Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 19 Aug 2011 16:57:30 +0200
Subject: [PATCH 439/600] nv50: fix potential null deref in insn modifer
 optimization pass

Bug introduced in 34980cd153050111edc545545ddff11f5b68347e.
---
 src/gallium/drivers/nv50/nv50_pc_optimize.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index a1590c2dc6b..4271731efa7 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -462,8 +462,10 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
          mi->saturate = 1;
          mi->def[0] = nvi->def[0];
          mi->def[0]->insn = mi;
-         mi->flags_def = nvi->flags_def;
-         mi->flags_def->insn = mi;
+         if (nvi->flags_def) {
+            mi->flags_def = nvi->flags_def;
+            mi->flags_def->insn = mi;
+         }
          nv_nvi_delete(nvi);
       }
    }

From f23c3ebeccc5c591b79c10cbdb693270ef27a2f5 Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Fri, 12 Aug 2011 16:50:27 -0700
Subject: [PATCH 440/600] mesa: Declare _mesa_meta_begin()/end() as public

Declare _mesa_meta_begin()/end() in meta.h so that drivers can write
custom meta-ops (such as HiZ resolves for i965).

This necessitates moving the the META_* macros into meta.h. To prevent
naming collisions, this commit renames each macro to be MESA_META_*.

Reviewed-by: Brian Paul <brianp@vmware.com>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/drivers/common/meta.c | 256 +++++++++++++++------------------
 src/mesa/drivers/common/meta.h |  33 +++++
 2 files changed, 147 insertions(+), 142 deletions(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index f9b4755988b..e37b78aae78 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -73,64 +73,36 @@
 /** Return offset in bytes of the field within a vertex struct */
 #define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD))
 
-
-/**
- * Flags passed to _mesa_meta_begin().
- */
-/*@{*/
-#define META_ALL              ~0x0
-#define META_ALPHA_TEST        0x1
-#define META_BLEND             0x2  /**< includes logicop */
-#define META_COLOR_MASK        0x4
-#define META_DEPTH_TEST        0x8
-#define META_FOG              0x10
-#define META_PIXEL_STORE      0x20
-#define META_PIXEL_TRANSFER   0x40
-#define META_RASTERIZATION    0x80
-#define META_SCISSOR         0x100
-#define META_SHADER          0x200
-#define META_STENCIL_TEST    0x400
-#define META_TRANSFORM       0x800 /**< modelview/projection matrix state */
-#define META_TEXTURE        0x1000
-#define META_VERTEX         0x2000
-#define META_VIEWPORT       0x4000
-#define META_CLAMP_FRAGMENT_COLOR 0x8000
-#define META_CLAMP_VERTEX_COLOR 0x10000
-#define META_CONDITIONAL_RENDER 0x20000
-#define META_CLIP          0x40000
-/*@}*/
-
-
 /**
  * State which we may save/restore across meta ops.
  * XXX this may be incomplete...
  */
 struct save_state
 {
-   GLbitfield SavedState;  /**< bitmask of META_* flags */
+   GLbitfield SavedState;  /**< bitmask of MESA_META_* flags */
 
-   /** META_ALPHA_TEST */
+   /** MESA_META_ALPHA_TEST */
    GLboolean AlphaEnabled;
    GLenum AlphaFunc;
    GLclampf AlphaRef;
 
-   /** META_BLEND */
+   /** MESA_META_BLEND */
    GLbitfield BlendEnabled;
    GLboolean ColorLogicOpEnabled;
 
-   /** META_COLOR_MASK */
+   /** MESA_META_COLOR_MASK */
    GLubyte ColorMask[MAX_DRAW_BUFFERS][4];
 
-   /** META_DEPTH_TEST */
+   /** MESA_META_DEPTH_TEST */
    struct gl_depthbuffer_attrib Depth;
 
-   /** META_FOG */
+   /** MESA_META_FOG */
    GLboolean Fog;
 
-   /** META_PIXEL_STORE */
+   /** MESA_META_PIXEL_STORE */
    struct gl_pixelstore_attrib Pack, Unpack;
 
-   /** META_PIXEL_TRANSFER */
+   /** MESA_META_PIXEL_TRANSFER */
    GLfloat RedBias, RedScale;
    GLfloat GreenBias, GreenScale;
    GLfloat BlueBias, BlueScale;
@@ -138,17 +110,17 @@ struct save_state
    GLfloat DepthBias, DepthScale;
    GLboolean MapColorFlag;
 
-   /** META_RASTERIZATION */
+   /** MESA_META_RASTERIZATION */
    GLenum FrontPolygonMode, BackPolygonMode;
    GLboolean PolygonOffset;
    GLboolean PolygonSmooth;
    GLboolean PolygonStipple;
    GLboolean PolygonCull;
 
-   /** META_SCISSOR */
+   /** MESA_META_SCISSOR */
    struct gl_scissor_attrib Scissor;
 
-   /** META_SHADER */
+   /** MESA_META_SHADER */
    GLboolean VertexProgramEnabled;
    struct gl_vertex_program *VertexProgram;
    GLboolean FragmentProgramEnabled;
@@ -158,19 +130,19 @@ struct save_state
    struct gl_shader_program *FragmentShader;
    struct gl_shader_program *ActiveShader;
 
-   /** META_STENCIL_TEST */
+   /** MESA_META_STENCIL_TEST */
    struct gl_stencil_attrib Stencil;
 
-   /** META_TRANSFORM */
+   /** MESA_META_TRANSFORM */
    GLenum MatrixMode;
    GLfloat ModelviewMatrix[16];
    GLfloat ProjectionMatrix[16];
    GLfloat TextureMatrix[16];
 
-   /** META_CLIP */
+   /** MESA_META_CLIP */
    GLbitfield ClipPlanesEnabled;
 
-   /** META_TEXTURE */
+   /** MESA_META_TEXTURE */
    GLuint ActiveUnit;
    GLuint ClientActiveUnit;
    /** for unit[0] only */
@@ -180,21 +152,21 @@ struct save_state
    GLbitfield TexGenEnabled[MAX_TEXTURE_UNITS];
    GLuint EnvMode;  /* unit[0] only */
 
-   /** META_VERTEX */
+   /** MESA_META_VERTEX */
    struct gl_array_object *ArrayObj;
    struct gl_buffer_object *ArrayBufferObj;
 
-   /** META_VIEWPORT */
+   /** MESA_META_VIEWPORT */
    GLint ViewportX, ViewportY, ViewportW, ViewportH;
    GLclampd DepthNear, DepthFar;
 
-   /** META_CLAMP_FRAGMENT_COLOR */
+   /** MESA_META_CLAMP_FRAGMENT_COLOR */
    GLenum ClampFragmentColor;
 
-   /** META_CLAMP_VERTEX_COLOR */
+   /** MESA_META_CLAMP_VERTEX_COLOR */
    GLenum ClampVertexColor;
 
-   /** META_CONDITIONAL_RENDER */
+   /** MESA_META_CONDITIONAL_RENDER */
    struct gl_query_object *CondRenderQuery;
    GLenum CondRenderMode;
 
@@ -342,10 +314,10 @@ _mesa_meta_free(struct gl_context *ctx)
  * Enter meta state.  This is like a light-weight version of glPushAttrib
  * but it also resets most GL state back to default values.
  *
- * \param state  bitmask of META_* flags indicating which attribute groups
+ * \param state  bitmask of MESA_META_* flags indicating which attribute groups
  *               to save and reset to their defaults
  */
-static void
+void
 _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
 {
    struct save_state *save;
@@ -357,7 +329,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
    memset(save, 0, sizeof(*save));
    save->SavedState = state;
 
-   if (state & META_ALPHA_TEST) {
+   if (state & MESA_META_ALPHA_TEST) {
       save->AlphaEnabled = ctx->Color.AlphaEnabled;
       save->AlphaFunc = ctx->Color.AlphaFunc;
       save->AlphaRef = ctx->Color.AlphaRef;
@@ -365,7 +337,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
          _mesa_set_enable(ctx, GL_ALPHA_TEST, GL_FALSE);
    }
 
-   if (state & META_BLEND) {
+   if (state & MESA_META_BLEND) {
       save->BlendEnabled = ctx->Color.BlendEnabled;
       if (ctx->Color.BlendEnabled) {
          if (ctx->Extensions.EXT_draw_buffers2) {
@@ -383,7 +355,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
          _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, GL_FALSE);
    }
 
-   if (state & META_COLOR_MASK) {
+   if (state & MESA_META_COLOR_MASK) {
       memcpy(save->ColorMask, ctx->Color.ColorMask,
              sizeof(ctx->Color.ColorMask));
       if (!ctx->Color.ColorMask[0][0] ||
@@ -393,26 +365,26 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
          _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
    }
 
-   if (state & META_DEPTH_TEST) {
+   if (state & MESA_META_DEPTH_TEST) {
       save->Depth = ctx->Depth; /* struct copy */
       if (ctx->Depth.Test)
          _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE);
    }
 
-   if (state & META_FOG) {
+   if (state & MESA_META_FOG) {
       save->Fog = ctx->Fog.Enabled;
       if (ctx->Fog.Enabled)
          _mesa_set_enable(ctx, GL_FOG, GL_FALSE);
    }
 
-   if (state & META_PIXEL_STORE) {
+   if (state & MESA_META_PIXEL_STORE) {
       save->Pack = ctx->Pack;
       save->Unpack = ctx->Unpack;
       ctx->Pack = ctx->DefaultPacking;
       ctx->Unpack = ctx->DefaultPacking;
    }
 
-   if (state & META_PIXEL_TRANSFER) {
+   if (state & MESA_META_PIXEL_TRANSFER) {
       save->RedScale = ctx->Pixel.RedScale;
       save->RedBias = ctx->Pixel.RedBias;
       save->GreenScale = ctx->Pixel.GreenScale;
@@ -435,7 +407,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       ctx->NewState |=_NEW_PIXEL;
    }
 
-   if (state & META_RASTERIZATION) {
+   if (state & MESA_META_RASTERIZATION) {
       save->FrontPolygonMode = ctx->Polygon.FrontMode;
       save->BackPolygonMode = ctx->Polygon.BackMode;
       save->PolygonOffset = ctx->Polygon.OffsetFill;
@@ -449,12 +421,12 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       _mesa_set_enable(ctx, GL_CULL_FACE, GL_FALSE);
    }
 
-   if (state & META_SCISSOR) {
+   if (state & MESA_META_SCISSOR) {
       save->Scissor = ctx->Scissor; /* struct copy */
       _mesa_set_enable(ctx, GL_SCISSOR_TEST, GL_FALSE);
    }
 
-   if (state & META_SHADER) {
+   if (state & MESA_META_SHADER) {
       if (ctx->Extensions.ARB_vertex_program) {
          save->VertexProgramEnabled = ctx->VertexProgram.Enabled;
          _mesa_reference_vertprog(ctx, &save->VertexProgram,
@@ -483,14 +455,14 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       }
    }
 
-   if (state & META_STENCIL_TEST) {
+   if (state & MESA_META_STENCIL_TEST) {
       save->Stencil = ctx->Stencil; /* struct copy */
       if (ctx->Stencil.Enabled)
          _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE);
       /* NOTE: other stencil state not reset */
    }
 
-   if (state & META_TEXTURE) {
+   if (state & MESA_META_TEXTURE) {
       GLuint u, tgt;
 
       save->ActiveUnit = ctx->Texture.CurrentUnit;
@@ -529,7 +501,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
    }
 
-   if (state & META_TRANSFORM) {
+   if (state & MESA_META_TRANSFORM) {
       GLuint activeTexture = ctx->Texture.CurrentUnit;
       memcpy(save->ModelviewMatrix, ctx->ModelviewMatrixStack.Top->m,
              16 * sizeof(GLfloat));
@@ -552,7 +524,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
                   -1.0, 1.0);
    }
 
-   if (state & META_CLIP) {
+   if (state & MESA_META_CLIP) {
       save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled;
       if (ctx->Transform.ClipPlanesEnabled) {
          GLuint i;
@@ -562,7 +534,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       }
    }
 
-   if (state & META_VERTEX) {
+   if (state & MESA_META_VERTEX) {
       /* save vertex array object state */
       _mesa_reference_array_object(ctx, &save->ArrayObj,
                                    ctx->Array.ArrayObj);
@@ -571,7 +543,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       /* set some default state? */
    }
 
-   if (state & META_VIEWPORT) {
+   if (state & MESA_META_VIEWPORT) {
       /* save viewport state */
       save->ViewportX = ctx->Viewport.X;
       save->ViewportY = ctx->Viewport.Y;
@@ -592,7 +564,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       _mesa_DepthRange(0.0, 1.0);
    }
 
-   if (state & META_CLAMP_FRAGMENT_COLOR) {
+   if (state & MESA_META_CLAMP_FRAGMENT_COLOR) {
       save->ClampFragmentColor = ctx->Color.ClampFragmentColor;
 
       /* Generally in here we want to do clamping according to whether
@@ -603,7 +575,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
 	 _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
    }
 
-   if (state & META_CLAMP_VERTEX_COLOR) {
+   if (state & MESA_META_CLAMP_VERTEX_COLOR) {
       save->ClampVertexColor = ctx->Light.ClampVertexColor;
 
       /* Generally in here we never want vertex color clamping --
@@ -612,7 +584,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
       _mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, GL_FALSE);
    }
 
-   if (state & META_CONDITIONAL_RENDER) {
+   if (state & MESA_META_CONDITIONAL_RENDER) {
       save->CondRenderQuery = ctx->Query.CondRenderQuery;
       save->CondRenderMode = ctx->Query.CondRenderMode;
 
@@ -632,19 +604,19 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
 /**
  * Leave meta state.  This is like a light-weight version of glPopAttrib().
  */
-static void
+void
 _mesa_meta_end(struct gl_context *ctx)
 {
    struct save_state *save = &ctx->Meta->Save[--ctx->Meta->SaveStackDepth];
    const GLbitfield state = save->SavedState;
 
-   if (state & META_ALPHA_TEST) {
+   if (state & MESA_META_ALPHA_TEST) {
       if (ctx->Color.AlphaEnabled != save->AlphaEnabled)
          _mesa_set_enable(ctx, GL_ALPHA_TEST, save->AlphaEnabled);
       _mesa_AlphaFunc(save->AlphaFunc, save->AlphaRef);
    }
 
-   if (state & META_BLEND) {
+   if (state & MESA_META_BLEND) {
       if (ctx->Color.BlendEnabled != save->BlendEnabled) {
          if (ctx->Extensions.EXT_draw_buffers2) {
             GLuint i;
@@ -660,7 +632,7 @@ _mesa_meta_end(struct gl_context *ctx)
          _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, save->ColorLogicOpEnabled);
    }
 
-   if (state & META_COLOR_MASK) {
+   if (state & MESA_META_COLOR_MASK) {
       GLuint i;
       for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
          if (!TEST_EQ_4V(ctx->Color.ColorMask[i], save->ColorMask[i])) {
@@ -679,23 +651,23 @@ _mesa_meta_end(struct gl_context *ctx)
       }
    }
 
-   if (state & META_DEPTH_TEST) {
+   if (state & MESA_META_DEPTH_TEST) {
       if (ctx->Depth.Test != save->Depth.Test)
          _mesa_set_enable(ctx, GL_DEPTH_TEST, save->Depth.Test);
       _mesa_DepthFunc(save->Depth.Func);
       _mesa_DepthMask(save->Depth.Mask);
    }
 
-   if (state & META_FOG) {
+   if (state & MESA_META_FOG) {
       _mesa_set_enable(ctx, GL_FOG, save->Fog);
    }
 
-   if (state & META_PIXEL_STORE) {
+   if (state & MESA_META_PIXEL_STORE) {
       ctx->Pack = save->Pack;
       ctx->Unpack = save->Unpack;
    }
 
-   if (state & META_PIXEL_TRANSFER) {
+   if (state & MESA_META_PIXEL_TRANSFER) {
       ctx->Pixel.RedScale = save->RedScale;
       ctx->Pixel.RedBias = save->RedBias;
       ctx->Pixel.GreenScale = save->GreenScale;
@@ -709,7 +681,7 @@ _mesa_meta_end(struct gl_context *ctx)
       ctx->NewState |=_NEW_PIXEL;
    }
 
-   if (state & META_RASTERIZATION) {
+   if (state & MESA_META_RASTERIZATION) {
       _mesa_PolygonMode(GL_FRONT, save->FrontPolygonMode);
       _mesa_PolygonMode(GL_BACK, save->BackPolygonMode);
       _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, save->PolygonStipple);
@@ -718,13 +690,13 @@ _mesa_meta_end(struct gl_context *ctx)
       _mesa_set_enable(ctx, GL_CULL_FACE, save->PolygonCull);
    }
 
-   if (state & META_SCISSOR) {
+   if (state & MESA_META_SCISSOR) {
       _mesa_set_enable(ctx, GL_SCISSOR_TEST, save->Scissor.Enabled);
       _mesa_Scissor(save->Scissor.X, save->Scissor.Y,
                     save->Scissor.Width, save->Scissor.Height);
    }
 
-   if (state & META_SHADER) {
+   if (state & MESA_META_SHADER) {
       if (ctx->Extensions.ARB_vertex_program) {
          _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB,
                           save->VertexProgramEnabled);
@@ -756,7 +728,7 @@ _mesa_meta_end(struct gl_context *ctx)
 				     save->ActiveShader);
    }
 
-   if (state & META_STENCIL_TEST) {
+   if (state & MESA_META_STENCIL_TEST) {
       const struct gl_stencil_attrib *stencil = &save->Stencil;
 
       _mesa_set_enable(ctx, GL_STENCIL_TEST, stencil->Enabled);
@@ -787,7 +759,7 @@ _mesa_meta_end(struct gl_context *ctx)
                               stencil->ZPassFunc[1]);
    }
 
-   if (state & META_TEXTURE) {
+   if (state & MESA_META_TEXTURE) {
       GLuint u, tgt;
 
       ASSERT(ctx->Texture.CurrentUnit == 0);
@@ -838,7 +810,7 @@ _mesa_meta_end(struct gl_context *ctx)
       _mesa_ClientActiveTextureARB(GL_TEXTURE0 + save->ClientActiveUnit);
    }
 
-   if (state & META_TRANSFORM) {
+   if (state & MESA_META_TRANSFORM) {
       GLuint activeTexture = ctx->Texture.CurrentUnit;
       _mesa_ActiveTextureARB(GL_TEXTURE0);
       _mesa_MatrixMode(GL_TEXTURE);
@@ -854,7 +826,7 @@ _mesa_meta_end(struct gl_context *ctx)
       _mesa_MatrixMode(save->MatrixMode);
    }
 
-   if (state & META_CLIP) {
+   if (state & MESA_META_CLIP) {
       if (save->ClipPlanesEnabled) {
          GLuint i;
          for (i = 0; i < ctx->Const.MaxClipPlanes; i++) {
@@ -865,7 +837,7 @@ _mesa_meta_end(struct gl_context *ctx)
       }
    }
 
-   if (state & META_VERTEX) {
+   if (state & MESA_META_VERTEX) {
       /* restore vertex buffer object */
       _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, save->ArrayBufferObj->Name);
       _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj, NULL);
@@ -875,7 +847,7 @@ _mesa_meta_end(struct gl_context *ctx)
       _mesa_reference_array_object(ctx, &save->ArrayObj, NULL);
    }
 
-   if (state & META_VIEWPORT) {
+   if (state & MESA_META_VIEWPORT) {
       if (save->ViewportX != ctx->Viewport.X ||
           save->ViewportY != ctx->Viewport.Y ||
           save->ViewportW != ctx->Viewport.Width ||
@@ -886,15 +858,15 @@ _mesa_meta_end(struct gl_context *ctx)
       _mesa_DepthRange(save->DepthNear, save->DepthFar);
    }
 
-   if (state & META_CLAMP_FRAGMENT_COLOR) {
+   if (state & MESA_META_CLAMP_FRAGMENT_COLOR) {
       _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, save->ClampFragmentColor);
    }
 
-   if (state & META_CLAMP_VERTEX_COLOR) {
+   if (state & MESA_META_CLAMP_VERTEX_COLOR) {
       _mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, save->ClampVertexColor);
    }
 
-   if (state & META_CONDITIONAL_RENDER) {
+   if (state & MESA_META_CONDITIONAL_RENDER) {
       if (save->CondRenderQuery)
 	 _mesa_BeginConditionalRender(save->CondRenderQuery->Id,
 				      save->CondRenderMode);
@@ -1360,7 +1332,7 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
    }
 
    /* only scissor effects blit so save/clear all other relevant state */
-   _mesa_meta_begin(ctx, ~META_SCISSOR);
+   _mesa_meta_begin(ctx, ~MESA_META_SCISSOR);
 
    if (blit->ArrayObj == 0) {
       /* one-time setup */
@@ -1489,15 +1461,15 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
    };
    struct vertex verts[4];
    /* save all state but scissor, pixel pack/unpack */
-   GLbitfield metaSave = (META_ALL -
-			  META_SCISSOR -
-			  META_PIXEL_STORE -
-			  META_CONDITIONAL_RENDER);
+   GLbitfield metaSave = (MESA_META_ALL -
+			  MESA_META_SCISSOR -
+			  MESA_META_PIXEL_STORE -
+			  MESA_META_CONDITIONAL_RENDER);
    const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
 
    if (buffers & BUFFER_BITS_COLOR) {
       /* if clearing color buffers, don't save/restore colormask */
-      metaSave -= META_COLOR_MASK;
+      metaSave -= MESA_META_COLOR_MASK;
    }
 
    _mesa_meta_begin(ctx, metaSave);
@@ -1532,7 +1504,7 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
       _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
    }
    else {
-      ASSERT(metaSave & META_COLOR_MASK);
+      ASSERT(metaSave & MESA_META_COLOR_MASK);
       _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
    }
 
@@ -1669,22 +1641,22 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
       GLfloat x, y, z;
    } verts[4];
 
-   metaSave = (META_ALPHA_TEST |
-	       META_BLEND |
-	       META_DEPTH_TEST |
-	       META_RASTERIZATION |
-	       META_SHADER |
-	       META_STENCIL_TEST |
-	       META_VERTEX |
-	       META_VIEWPORT |
-	       META_CLIP |
-	       META_CLAMP_FRAGMENT_COLOR);
+   metaSave = (MESA_META_ALPHA_TEST |
+	       MESA_META_BLEND |
+	       MESA_META_DEPTH_TEST |
+	       MESA_META_RASTERIZATION |
+	       MESA_META_SHADER |
+	       MESA_META_STENCIL_TEST |
+	       MESA_META_VERTEX |
+	       MESA_META_VIEWPORT |
+	       MESA_META_CLIP |
+	       MESA_META_CLAMP_FRAGMENT_COLOR);
 
    if (!(buffers & BUFFER_BITS_COLOR)) {
       /* We'll use colormask to disable color writes.  Otherwise,
        * respect color mask
        */
-      metaSave |= META_COLOR_MASK;
+      metaSave |= MESA_META_COLOR_MASK;
    }
 
    _mesa_meta_begin(ctx, metaSave);
@@ -1706,7 +1678,7 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
       _mesa_ClampColorARB(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
    }
    else {
-      ASSERT(metaSave & META_COLOR_MASK);
+      ASSERT(metaSave & MESA_META_COLOR_MASK);
       _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
    }
 
@@ -1788,13 +1760,13 @@ _mesa_meta_CopyPixels(struct gl_context *ctx, GLint srcX, GLint srcY,
    /* Most GL state applies to glCopyPixels, but a there's a few things
     * we need to override:
     */
-   _mesa_meta_begin(ctx, (META_RASTERIZATION |
-                          META_SHADER |
-                          META_TEXTURE |
-                          META_TRANSFORM |
-                          META_CLIP |
-                          META_VERTEX |
-                          META_VIEWPORT));
+   _mesa_meta_begin(ctx, (MESA_META_RASTERIZATION |
+                          MESA_META_SHADER |
+                          MESA_META_TEXTURE |
+                          MESA_META_TRANSFORM |
+                          MESA_META_CLIP |
+                          MESA_META_VERTEX |
+                          MESA_META_VIEWPORT));
 
    if (copypix->ArrayObj == 0) {
       /* one-time setup */
@@ -2069,10 +2041,10 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,
           * in [0,1].
           */
          texIntFormat = GL_ALPHA;
-         metaExtraSave = (META_COLOR_MASK |
-                          META_DEPTH_TEST |
-                          META_SHADER |
-                          META_STENCIL_TEST);
+         metaExtraSave = (MESA_META_COLOR_MASK |
+                          MESA_META_DEPTH_TEST |
+                          MESA_META_SHADER |
+                          MESA_META_STENCIL_TEST);
       }
       else {
          fallback = GL_TRUE;
@@ -2082,7 +2054,7 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,
       if (ctx->Extensions.ARB_depth_texture &&
           ctx->Extensions.ARB_fragment_program) {
          texIntFormat = GL_DEPTH_COMPONENT;
-         metaExtraSave = (META_SHADER);
+         metaExtraSave = (MESA_META_SHADER);
       }
       else {
          fallback = GL_TRUE;
@@ -2110,14 +2082,14 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,
    /* Most GL state applies to glDrawPixels (like blending, stencil, etc),
     * but a there's a few things we need to override:
     */
-   _mesa_meta_begin(ctx, (META_RASTERIZATION |
-                          META_SHADER |
-                          META_TEXTURE |
-                          META_TRANSFORM |
-                          META_CLIP |
-                          META_VERTEX |
-                          META_VIEWPORT |
-			  META_CLAMP_FRAGMENT_COLOR |
+   _mesa_meta_begin(ctx, (MESA_META_RASTERIZATION |
+                          MESA_META_SHADER |
+                          MESA_META_TEXTURE |
+                          MESA_META_TRANSFORM |
+                          MESA_META_CLIP |
+                          MESA_META_VERTEX |
+                          MESA_META_VIEWPORT |
+			  MESA_META_CLAMP_FRAGMENT_COLOR |
                           metaExtraSave));
 
    newTex = alloc_texture(tex, width, height, texIntFormat);
@@ -2318,15 +2290,15 @@ _mesa_meta_Bitmap(struct gl_context *ctx,
    /* Most GL state applies to glBitmap (like blending, stencil, etc),
     * but a there's a few things we need to override:
     */
-   _mesa_meta_begin(ctx, (META_ALPHA_TEST |
-                          META_PIXEL_STORE |
-                          META_RASTERIZATION |
-                          META_SHADER |
-                          META_TEXTURE |
-                          META_TRANSFORM |
-                          META_CLIP |
-                          META_VERTEX |
-                          META_VIEWPORT));
+   _mesa_meta_begin(ctx, (MESA_META_ALPHA_TEST |
+                          MESA_META_PIXEL_STORE |
+                          MESA_META_RASTERIZATION |
+                          MESA_META_SHADER |
+                          MESA_META_TEXTURE |
+                          MESA_META_TRANSFORM |
+                          MESA_META_CLIP |
+                          MESA_META_VERTEX |
+                          MESA_META_VIEWPORT));
 
    if (bitmap->ArrayObj == 0) {
       /* one-time setup */
@@ -2544,7 +2516,7 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
       faceTarget = target;
    }
 
-   _mesa_meta_begin(ctx, META_ALL);
+   _mesa_meta_begin(ctx, MESA_META_ALL);
 
    if (original_active_unit != 0)
       _mesa_BindTexture(target, texObj->Name);
@@ -2900,7 +2872,7 @@ copy_tex_sub_image(struct gl_context *ctx,
    /*
     * Read image from framebuffer (disable pixel transfer ops)
     */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER);
    ctx->Driver.ReadPixels(ctx, x, y, width, height,
 			  format, type, &ctx->Pack, buf);
    _mesa_meta_end(ctx);
@@ -2910,7 +2882,7 @@ copy_tex_sub_image(struct gl_context *ctx,
    /*
     * Store texture data (with pixel transfer ops)
     */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE);
+   _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE);
    if (target == GL_TEXTURE_1D) {
       ctx->Driver.TexSubImage1D(ctx, target, level, xoffset,
                                 width, format, type, buf,
@@ -2982,7 +2954,7 @@ _mesa_meta_CopyColorTable(struct gl_context *ctx,
    /*
     * Read image from framebuffer (disable pixel transfer ops)
     */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER);
    ctx->Driver.ReadPixels(ctx, x, y, width, 1,
                           GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
 
@@ -3009,7 +2981,7 @@ _mesa_meta_CopyColorSubTable(struct gl_context *ctx,GLenum target, GLsizei start
    /*
     * Read image from framebuffer (disable pixel transfer ops)
     */
-   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   _mesa_meta_begin(ctx, MESA_META_PIXEL_STORE | MESA_META_PIXEL_TRANSFER);
    ctx->Driver.ReadPixels(ctx, x, y, width, 1,
                           GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
 
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index 95b4b5579c8..ac20e370eb8 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -26,6 +26,33 @@
 #ifndef META_H
 #define META_H
 
+/**
+ * \name Flags for meta operations
+ * \{
+ *
+ * These flags are passed to _mesa_meta_begin().
+ */
+#define MESA_META_ALL                      ~0x0
+#define MESA_META_ALPHA_TEST                0x1
+#define MESA_META_BLEND                     0x2  /**< includes logicop */
+#define MESA_META_COLOR_MASK                0x4
+#define MESA_META_DEPTH_TEST                0x8
+#define MESA_META_FOG                      0x10
+#define MESA_META_PIXEL_STORE              0x20
+#define MESA_META_PIXEL_TRANSFER           0x40
+#define MESA_META_RASTERIZATION            0x80
+#define MESA_META_SCISSOR                 0x100
+#define MESA_META_SHADER                  0x200
+#define MESA_META_STENCIL_TEST            0x400
+#define MESA_META_TRANSFORM               0x800 /**< modelview/projection matrix state */
+#define MESA_META_TEXTURE                0x1000
+#define MESA_META_VERTEX                 0x2000
+#define MESA_META_VIEWPORT               0x4000
+#define MESA_META_CLAMP_FRAGMENT_COLOR   0x8000
+#define MESA_META_CLAMP_VERTEX_COLOR    0x10000
+#define MESA_META_CONDITIONAL_RENDER    0x20000
+#define MESA_META_CLIP                  0x40000
+/**\}*/
 
 extern void
 _mesa_meta_init(struct gl_context *ctx);
@@ -33,6 +60,12 @@ _mesa_meta_init(struct gl_context *ctx);
 extern void
 _mesa_meta_free(struct gl_context *ctx);
 
+extern void
+_mesa_meta_begin(struct gl_context *ctx, GLbitfield state);
+
+extern void
+_mesa_meta_end(struct gl_context *ctx);
+
 extern void
 _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
                            GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,

From 352cab498a6210e1fdd57d6be2a30d33024ef37f Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 18 Aug 2011 14:35:27 -0600
Subject: [PATCH 441/600] mesa: restructure error checking in
 _mesa_FramebufferTexture1D/2DEXT()

In anticipation of adding more texture targets.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/main/fbobject.c | 56 ++++++++++++++++++++++++++++++++--------
 1 file changed, 45 insertions(+), 11 deletions(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 7646f9bdb03..e25ec8cc2b7 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1984,10 +1984,23 @@ _mesa_FramebufferTexture1DEXT(GLenum target, GLenum attachment,
 {
    GET_CURRENT_CONTEXT(ctx);
 
-   if ((texture != 0) && (textarget != GL_TEXTURE_1D)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glFramebufferTexture1DEXT(textarget)");
-      return;
+   if (texture != 0) {
+      GLboolean error;
+
+      switch (textarget) {
+      case GL_TEXTURE_1D:
+         error = GL_FALSE;
+         break;
+      default:
+         error = GL_TRUE;
+      }
+
+      if (error) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glFramebufferTexture1DEXT(textarget=%s)",
+                     _mesa_lookup_enum_by_nr(textarget));
+         return;
+      }
    }
 
    framebuffer_texture(ctx, "1D", target, attachment, textarget, texture,
@@ -2001,13 +2014,34 @@ _mesa_FramebufferTexture2DEXT(GLenum target, GLenum attachment,
 {
    GET_CURRENT_CONTEXT(ctx);
 
-   if ((texture != 0) &&
-       (textarget != GL_TEXTURE_2D) &&
-       (textarget != GL_TEXTURE_RECTANGLE_ARB) &&
-       (!is_cube_face(textarget))) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glFramebufferTexture2DEXT(textarget=0x%x)", textarget);
-      return;
+   if (texture != 0) {
+      GLboolean error;
+
+      switch (textarget) {
+      case GL_TEXTURE_2D:
+         error = GL_FALSE;
+         break;
+      case GL_TEXTURE_RECTANGLE:
+         error = !ctx->Extensions.NV_texture_rectangle;
+         break;
+      case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+      case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+      case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+      case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+      case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+      case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+         error = !ctx->Extensions.ARB_texture_cube_map;
+         break;
+      default:
+         error = GL_FALSE;
+      }
+
+      if (error) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glFramebufferTexture2DEXT(textarget=%s)",
+                     _mesa_lookup_enum_by_nr(textarget));
+         return;
+      }
    }
 
    framebuffer_texture(ctx, "2D", target, attachment, textarget, texture,

From 0eb18ee55719377ebd90456bde605384ce4ec14a Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 18 Aug 2011 15:54:53 -0600
Subject: [PATCH 442/600] mesa: set Q=1 for OPCODE_TEX execution

Q should not be significant for OPCODE_TEX, but it winds up getting
passed to the compute_lambda() function.  Make sure it's 1.0 to
prevent garbage values, which is effectively what we get when the
swizzle is coord.xyzz (which is what GLSL gives us).

Part of the fix for piglit's fbo-generatemipmap-array test.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/program/prog_execute.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index c70a1e344e5..77f842a1630 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -1651,6 +1651,14 @@ _mesa_execute_program(struct gl_context * ctx,
             GLfloat texcoord[4], color[4];
             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
 
+            /* For TEX, texcoord.Q should not be used and its value should not
+             * matter (at most, we pass coord.xyz to texture3D() in GLSL).
+             * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
+             * which is effectively what happens when the texcoord swizzle
+             * is .xyzz
+             */
+            texcoord[3] = 1.0f;
+
             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
 
             if (DEBUG_PROG) {

From 0f8c43c34f74b2ebc40ade2944f3b56b7dc606b0 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 18 Aug 2011 15:58:00 -0600
Subject: [PATCH 443/600] meta: use fallback mipmap generation for 1D/2D
 texture arrays

We could do 1D/2D arrays with textured quad rendering, but it'll take
some work (as with 3D textures).

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/common/meta.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index e37b78aae78..291d912121b 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -2424,7 +2424,9 @@ _mesa_meta_check_generate_mipmap_fallback(struct gl_context *ctx, GLenum target,
 
    /* check for fallbacks */
    if (!ctx->Extensions.EXT_framebuffer_object ||
-       target == GL_TEXTURE_3D) {
+       target == GL_TEXTURE_3D ||
+       target == GL_TEXTURE_1D_ARRAY ||
+       target == GL_TEXTURE_2D_ARRAY) {
       return GL_TRUE;
    }
 
@@ -2476,7 +2478,8 @@ _mesa_meta_check_generate_mipmap_fallback(struct gl_context *ctx, GLenum target,
 
 /**
  * Called via ctx->Driver.GenerateMipmap()
- * Note: texture borders and 3D texture support not yet complete.
+ * Note: We don't yet support 3D textures, 1D/2D array textures or texture
+ * borders.
  */
 void
 _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,

From 3e9dc51f82276e57ecfb4e2725d88d83dbedcd85 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 18 Aug 2011 15:59:33 -0600
Subject: [PATCH 444/600] mesa: handle array textures in GenerateMipmap(),
 FramebufferTexture1/2D()

This was an unfinished to-do item before.
With this patch and the two preceeding patches, piglit's
fbo-generatemipmap-array test runs and passes instead of generating
a GL error and dying on an assertion.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/main/fbobject.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index e25ec8cc2b7..0b48fc7eab0 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1991,6 +1991,9 @@ _mesa_FramebufferTexture1DEXT(GLenum target, GLenum attachment,
       case GL_TEXTURE_1D:
          error = GL_FALSE;
          break;
+      case GL_TEXTURE_1D_ARRAY:
+         error = !ctx->Extensions.EXT_texture_array;
+         break;
       default:
          error = GL_TRUE;
       }
@@ -2032,6 +2035,9 @@ _mesa_FramebufferTexture2DEXT(GLenum target, GLenum attachment,
       case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
          error = !ctx->Extensions.ARB_texture_cube_map;
          break;
+      case GL_TEXTURE_2D_ARRAY:
+         error = !ctx->Extensions.EXT_texture_array;
+         break;
       default:
          error = GL_FALSE;
       }
@@ -2380,6 +2386,8 @@ void GLAPIENTRY
 _mesa_GenerateMipmapEXT(GLenum target)
 {
    struct gl_texture_object *texObj;
+   GLboolean error;
+
    GET_CURRENT_CONTEXT(ctx);
 
    ASSERT_OUTSIDE_BEGIN_END(ctx);
@@ -2389,12 +2397,22 @@ _mesa_GenerateMipmapEXT(GLenum target)
    case GL_TEXTURE_1D:
    case GL_TEXTURE_2D:
    case GL_TEXTURE_3D:
+      error = GL_FALSE;
+      break;
    case GL_TEXTURE_CUBE_MAP:
-      /* OK, legal value */
+      error = !ctx->Extensions.ARB_texture_cube_map;
+      break;
+   case GL_TEXTURE_1D_ARRAY:
+   case GL_TEXTURE_2D_ARRAY:
+      error = !ctx->Extensions.EXT_texture_array;
       break;
    default:
-      /* XXX need to implement GL_TEXTURE_1D_ARRAY and GL_TEXTURE_2D_ARRAY */
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target)");
+      error = GL_TRUE;
+   }
+
+   if (error) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target=%s)",
+                  _mesa_lookup_enum_by_nr(target));
       return;
    }
 

From 68c54abb2cfd12a031829e78d721b2480d0c8cc4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 19 Aug 2011 01:07:46 +0200
Subject: [PATCH 445/600] r600g: fix depth-stencil on evergreen

Such that it actually works in apps which use both.

A separate buffer is allocated for stencil. The only exception is
the window-system-provided depth-stencil buffer, where depth and stencil
share the same buffer.

This fixes:
- fbo-depthstencil-GL_DEPTH24_STENCIL8-clear
- fbo-depthstencil-GL_DEPTH24_STENCIL8-drawpixels-FLOAT-and-USHORT
- fbo-depthstencil-GL_DEPTH24_STENCIL8-readpixels-24_8
- fbo-depthstencil-GL_DEPTH24_STENCIL8-readpixels-FLOAT-and-USHORT
---
 src/gallium/drivers/r600/evergreen_state.c | 65 +++++++----------
 src/gallium/drivers/r600/r600_blit.c       |  7 +-
 src/gallium/drivers/r600/r600_resource.h   |  7 ++
 src/gallium/drivers/r600/r600_texture.c    | 83 +++++++++++++++++-----
 4 files changed, 103 insertions(+), 59 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index f82e20306d1..f2b3b8304bb 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -262,7 +262,6 @@ static uint32_t r600_translate_dbformat(enum pipe_format format)
 	case PIPE_FORMAT_Z16_UNORM:
 		return V_028040_Z_16;
 	case PIPE_FORMAT_Z24X8_UNORM:
-		return V_028040_Z_24;
 	case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
 		return V_028040_Z_24;
 	default:
@@ -270,14 +269,6 @@ static uint32_t r600_translate_dbformat(enum pipe_format format)
 	}
 }
 
-static uint32_t r600_translate_stencilformat(enum pipe_format format)
-{
-	if (format == PIPE_FORMAT_Z24_UNORM_S8_USCALED)
-		return 1;
-	else
-		return 0;
-}
-
 static uint32_t r600_translate_colorswap(enum pipe_format format)
 {
 	switch (format) {
@@ -1381,55 +1372,51 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
 }
 
 static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate,
-			const struct pipe_framebuffer_state *state)
+			 const struct pipe_framebuffer_state *state)
 {
 	struct r600_resource_texture *rtex;
-	struct r600_resource *rbuffer;
 	struct r600_surface *surf;
-	unsigned level;
-	unsigned pitch, slice, format, stencil_format;
+	unsigned level, first_layer;
+	unsigned pitch, slice, format;
 	unsigned offset;
 
 	if (state->zsbuf == NULL)
 		return;
 
-	level = state->zsbuf->u.tex.level;
-
 	surf = (struct r600_surface *)state->zsbuf;
-	rtex = (struct r600_resource_texture*)state->zsbuf->texture;
+	rtex = (struct r600_resource_texture*)surf->base.texture;
 
-	rbuffer = &rtex->resource;
-
-	/* XXX quite sure for dx10+ hw don't need any offset hacks */
-	offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
-					 level, state->zsbuf->u.tex.first_layer);
+	level = surf->base.u.tex.level;
+	first_layer = surf->base.u.tex.first_layer;
+	offset = r600_texture_get_offset(rtex, level, first_layer);
 	pitch = rtex->pitch_in_blocks[level] / 8 - 1;
 	slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
-	format = r600_translate_dbformat(state->zsbuf->texture->format);
-	stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format);
+	format = r600_translate_dbformat(rtex->real_format);
 
 	r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
-				offset >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
+				offset >> 8, 0xFFFFFFFF, rtex->resource.bo, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE,
-				offset >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
-
-	if (stencil_format) {
-		uint32_t stencil_offset;
-
-		stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255;
-		r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
-					(offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
-		r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
-					(offset + stencil_offset) >> 8, 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
-	}
-
+				offset >> 8, 0xFFFFFFFF, rtex->resource.bo, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0);
-	r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
-				S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
+
+	if (rtex->stencil) {
+		uint32_t stencil_offset =
+			r600_texture_get_offset(rtex->stencil, level, first_layer);
+
+		r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
+					stencil_offset >> 8, 0xFFFFFFFF, rtex->stencil->resource.bo, RADEON_USAGE_READWRITE);
+		r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
+					stencil_offset >> 8, 0xFFFFFFFF, rtex->stencil->resource.bo, RADEON_USAGE_READWRITE);
+		r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
+					1, 0xFFFFFFFF, rtex->stencil->resource.bo, RADEON_USAGE_READWRITE);
+	} else {
+		r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
+					0, 0xFFFFFFFF, NULL, RADEON_USAGE_READWRITE);
+	}
 
 	r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO,
 				S_028040_ARRAY_MODE(rtex->array_mode[level]) | S_028040_FORMAT(format),
-				0xFFFFFFFF, rbuffer->bo, RADEON_USAGE_READWRITE);
+				0xFFFFFFFF, rtex->resource.bo, RADEON_USAGE_READWRITE);
 	r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE,
 				S_028058_PITCH_TILE_MAX(pitch),
 				0xFFFFFFFF, NULL, 0);
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index e1cf585234e..2f7e871448a 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -111,7 +111,7 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t
 	if (!texture->dirty_db)
 		return;
 
-	surf_tmpl.format = texture->resource.b.b.b.format;
+	surf_tmpl.format = texture->real_format;
 	surf_tmpl.u.tex.level = level;
 	surf_tmpl.u.tex.first_layer = 0;
 	surf_tmpl.u.tex.last_layer = 0;
@@ -119,7 +119,7 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t
 
 	zsurf = ctx->create_surface(ctx, &texture->resource.b.b.b, &surf_tmpl);
 
-	surf_tmpl.format = ((struct pipe_resource*)texture->flushed_depth_texture)->format;
+	surf_tmpl.format = texture->flushed_depth_texture->real_format;
 	surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
 	cbsurf = ctx->create_surface(ctx,
 			(struct pipe_resource*)texture->flushed_depth_texture, &surf_tmpl);
@@ -249,7 +249,7 @@ static void r600_compressed_to_blittable(struct pipe_resource *tex,
 				   struct texture_orig_info *orig)
 {
 	struct r600_resource_texture *rtex = (struct r600_resource_texture*)tex;
-	unsigned pixsize = util_format_get_blocksize(tex->format);
+	unsigned pixsize = util_format_get_blocksize(rtex->real_format);
 	int new_format;
 	int new_height, new_width;
 
@@ -269,7 +269,6 @@ static void r600_compressed_to_blittable(struct pipe_resource *tex,
 	tex->width0 = new_width;
 	tex->height0 = new_height;
 	tex->format = new_format;
-
 }
 
 static void r600_reset_blittable_to_compressed(struct pipe_resource *tex,
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 836e7491f1f..d9d29db7968 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -52,6 +52,12 @@ struct r600_resource {
 
 struct r600_resource_texture {
 	struct r600_resource		resource;
+
+	/* If this resource is a depth-stencil buffer on evergreen, this contains
+	 * the depth part of the format. There is a separate stencil resource
+	 * for the stencil buffer below. */
+	enum pipe_format		real_format;
+
 	unsigned			offset[PIPE_MAX_TEXTURE_LEVELS];
 	unsigned			pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS];  /* transfer */
 	unsigned			pitch_in_blocks[PIPE_MAX_TEXTURE_LEVELS]; /* texture resource */
@@ -62,6 +68,7 @@ struct r600_resource_texture {
 	unsigned			tile_type;
 	unsigned			depth;
 	unsigned			dirty_db;
+	struct r600_resource_texture    *stencil; /* Stencil is in a separate buffer on Evergreen. */
 	struct r600_resource_texture	*flushed_depth_texture;
 	boolean				is_flushing_texture;
 
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 7b5a3e74a26..1c6f39adee8 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -173,15 +173,15 @@ static unsigned r600_texture_get_nblocksx(struct pipe_screen *screen,
 {
 	struct pipe_resource *ptex = &rtex->resource.b.b.b;
 	unsigned nblocksx, block_align, width;
-	unsigned blocksize = util_format_get_blocksize(ptex->format);
+	unsigned blocksize = util_format_get_blocksize(rtex->real_format);
 
 	if (rtex->pitch_override)
 		return rtex->pitch_override / blocksize;
 
 	width = mip_minify(ptex->width0, level);
-	nblocksx = util_format_get_nblocksx(ptex->format, width);
+	nblocksx = util_format_get_nblocksx(rtex->real_format, width);
 
-	block_align = r600_get_block_alignment(screen, ptex->format,
+	block_align = r600_get_block_alignment(screen, rtex->real_format,
 					      rtex->array_mode[level]);
 	nblocksx = align(nblocksx, block_align);
 	return nblocksx;
@@ -195,7 +195,7 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen,
 	unsigned height, tile_height;
 
 	height = mip_minify(ptex->height0, level);
-	height = util_format_get_nblocksy(ptex->format, height);
+	height = util_format_get_nblocksy(rtex->real_format, height);
 	tile_height = r600_get_height_alignment(screen,
 						rtex->array_mode[level]);
 	height = align(height, tile_height);
@@ -220,7 +220,7 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen,
 		unsigned w, h, tile_height, tile_width;
 
 		tile_height = r600_get_height_alignment(screen, array_mode);
-		tile_width = r600_get_block_alignment(screen, ptex->format, array_mode);
+		tile_width = r600_get_block_alignment(screen, rtex->real_format, array_mode);
 
 		w = mip_minify(ptex->width0, level);
 		h = mip_minify(ptex->height0, level);
@@ -241,11 +241,11 @@ static void r600_setup_miptree(struct pipe_screen *screen,
 	struct radeon *radeon = ((struct r600_screen*)screen)->radeon;
 	enum chip_class chipc = r600_get_family_class(radeon);
 	unsigned size, layer_size, i, offset;
-	unsigned nblocksx, nblocksy, extra_size = 0;
+	unsigned nblocksx, nblocksy;
 
 	for (i = 0, offset = 0; i <= ptex->last_level; i++) {
-		unsigned blocksize = util_format_get_blocksize(ptex->format);
-		unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode);
+		unsigned blocksize = util_format_get_blocksize(rtex->real_format);
+		unsigned base_align = r600_get_base_alignment(screen, rtex->real_format, array_mode);
 
 		r600_texture_set_array_mode(screen, rtex, i, array_mode);
 
@@ -264,10 +264,6 @@ static void r600_setup_miptree(struct pipe_screen *screen,
 		else
 			size = layer_size * ptex->array_size;
 
-		/* evergreen stores depth and stencil separately */
-		if ((chipc >= EVERGREEN) && util_format_is_depth_or_stencil(ptex->format))
-			extra_size = align(extra_size + (nblocksx * nblocksy * 1), base_align);
-
 		/* align base image and start of miptree */
 		if ((i == 0) || (i == 1))
 			offset = align(offset, base_align);
@@ -278,7 +274,7 @@ static void r600_setup_miptree(struct pipe_screen *screen,
 
 		offset += size;
 	}
-	rtex->size = offset + extra_size;
+	rtex->size = offset;
 }
 
 /* Figure out whether u_blitter will fallback to a transfer operation.
@@ -384,20 +380,76 @@ r600_texture_create_object(struct pipe_screen *screen,
 	resource->b.b.b.screen = screen;
 	resource->bo = bo;
 	rtex->pitch_override = pitch_in_bytes_override;
+	rtex->real_format = base->format;
+
+	/* We must split depth and stencil into two separate buffers on Evergreen. */
+	if (r600_get_family_class(((struct r600_screen*)screen)->radeon) >= EVERGREEN &&
+	    util_format_is_depth_and_stencil(base->format)) {
+		struct pipe_resource stencil;
+		unsigned stencil_pitch_override = 0;
+
+		switch (base->format) {
+		case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+			rtex->real_format = PIPE_FORMAT_Z24X8_UNORM;
+			break;
+		case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+			rtex->real_format = PIPE_FORMAT_X8Z24_UNORM;
+			break;
+		case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
+			rtex->real_format = PIPE_FORMAT_Z32_FLOAT;
+			break;
+		default:
+			assert(0);
+			FREE(rtex);
+			return NULL;
+		}
+
+		/* Divide the pitch in bytes by 4 for stencil, because it has a smaller pixel size. */
+		if (pitch_in_bytes_override) {
+			assert(base->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED ||
+			       base->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM);
+			stencil_pitch_override = pitch_in_bytes_override / 4;
+		}
+
+		/* Allocate the stencil buffer. */
+		stencil = *base;
+		stencil.format = PIPE_FORMAT_S8_USCALED;
+		rtex->stencil = r600_texture_create_object(screen, &stencil, array_mode,
+							   stencil_pitch_override, max_buffer_size, bo);
+		if (!rtex->stencil) {
+			FREE(rtex);
+			return NULL;
+		}
+		/* Proceed in creating the depth buffer. */
+	}
+
 	/* only mark depth textures the HW can hit as depth textures */
-	if (util_format_is_depth_or_stencil(base->format) && permit_hardware_blit(screen, base))
+	if (util_format_is_depth_or_stencil(rtex->real_format) && permit_hardware_blit(screen, base))
 		rtex->depth = 1;
 
 	r600_setup_miptree(screen, rtex, array_mode);
 
 	resource->size = rtex->size;
 
+	/* If bo is not NULL, in which case depth and stencil must share the same buffer,
+	 * and we initialized separate stencil for Evergreen. place it after depth. */
+	if (bo && rtex->stencil) {
+		unsigned stencil_align, stencil_offset;
+
+		stencil_align = r600_get_base_alignment(screen, rtex->stencil->real_format, array_mode);
+		stencil_offset = align(rtex->size, stencil_align);
+
+		for (unsigned i = 0; i <= rtex->stencil->resource.b.b.b.last_level; i++)
+			rtex->stencil->offset[i] += stencil_offset;
+	}
+
 	if (!resource->bo) {
 		struct pipe_resource *ptex = &rtex->resource.b.b.b;
-		int base_align = r600_get_base_alignment(screen, ptex->format, array_mode);
+		unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode);
 
 		resource->bo = r600_bo(radeon, rtex->size, base_align, base->bind, base->usage);
 		if (!resource->bo) {
+			pipe_resource_reference((struct pipe_resource**)&rtex->stencil, NULL);
 			FREE(rtex);
 			return NULL;
 		}
@@ -436,7 +488,6 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
 
 	return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
 								  0, 0, NULL);
-
 }
 
 static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,

From 7f29824fd5df27eca516ad65e4a4f8ff94fe7bed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 19 Aug 2011 19:21:40 +0200
Subject: [PATCH 446/600] r600g: put depth and stencil into one backing buffer

For DRI2 sharing.
---
 src/gallium/drivers/r600/r600_texture.c | 27 ++++++++++++++++---------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 1c6f39adee8..f0cf1f593d8 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -363,7 +363,8 @@ r600_texture_create_object(struct pipe_screen *screen,
 			   unsigned array_mode,
 			   unsigned pitch_in_bytes_override,
 			   unsigned max_buffer_size,
-			   struct r600_bo *bo)
+			   struct r600_bo *bo,
+			   boolean alloc_bo)
 {
 	struct r600_resource_texture *rtex;
 	struct r600_resource *resource;
@@ -415,7 +416,8 @@ r600_texture_create_object(struct pipe_screen *screen,
 		stencil = *base;
 		stencil.format = PIPE_FORMAT_S8_USCALED;
 		rtex->stencil = r600_texture_create_object(screen, &stencil, array_mode,
-							   stencil_pitch_override, max_buffer_size, bo);
+							   stencil_pitch_override,
+							   max_buffer_size, NULL, FALSE);
 		if (!rtex->stencil) {
 			FREE(rtex);
 			return NULL;
@@ -429,11 +431,8 @@ r600_texture_create_object(struct pipe_screen *screen,
 
 	r600_setup_miptree(screen, rtex, array_mode);
 
-	resource->size = rtex->size;
-
-	/* If bo is not NULL, in which case depth and stencil must share the same buffer,
-	 * and we initialized separate stencil for Evergreen. place it after depth. */
-	if (bo && rtex->stencil) {
+	/* If we initialized separate stencil for Evergreen. place it after depth. */
+	if (rtex->stencil) {
 		unsigned stencil_align, stencil_offset;
 
 		stencil_align = r600_get_base_alignment(screen, rtex->stencil->real_format, array_mode);
@@ -441,9 +440,14 @@ r600_texture_create_object(struct pipe_screen *screen,
 
 		for (unsigned i = 0; i <= rtex->stencil->resource.b.b.b.last_level; i++)
 			rtex->stencil->offset[i] += stencil_offset;
+
+		rtex->size = stencil_offset + rtex->stencil->size;
 	}
 
-	if (!resource->bo) {
+	resource->size = rtex->size;
+
+	/* Now create the backing buffer. */
+	if (!resource->bo && alloc_bo) {
 		struct pipe_resource *ptex = &rtex->resource.b.b.b;
 		unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode);
 
@@ -454,6 +458,9 @@ r600_texture_create_object(struct pipe_screen *screen,
 			return NULL;
 		}
 	}
+
+	if (rtex->stencil)
+		rtex->stencil->resource.bo = rtex->resource.bo;
 	return rtex;
 }
 
@@ -487,7 +494,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
 		array_mode = V_038000_ARRAY_1D_TILED_THIN1;
 
 	return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
-								  0, 0, NULL);
+								  0, 0, NULL, TRUE);
 }
 
 static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
@@ -548,7 +555,7 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
 	}
 
 	return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
-								  stride, 0, bo);
+								  stride, 0, bo, FALSE);
 }
 
 int r600_texture_depth_flush(struct pipe_context *ctx,

From 98a87a594b6983d2a05d9412e3fa074894c334ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 19 Aug 2011 19:26:08 +0200
Subject: [PATCH 447/600] r600g: simplify the conditionals determining array
 mode

---
 src/gallium/drivers/r600/r600_texture.c | 31 +++++++++----------------
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index f0cf1f593d8..74219e8005f 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -464,35 +464,26 @@ r600_texture_create_object(struct pipe_screen *screen,
 	return rtex;
 }
 
+DEBUG_GET_ONCE_BOOL_OPTION(tiling_enabled, "R600_TILING", FALSE);
+
 struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
 						const struct pipe_resource *templ)
 {
+	struct radeon *radeon = ((struct r600_screen*)screen)->radeon;
 	unsigned array_mode = 0;
-	static int force_tiling = -1;
 
-	/* Would like some magic "get_bool_option_once" routine.
-	 */
-	if (force_tiling == -1) {
-#if 0
-		/* reenable when 2D tiling is fixed better */
-		struct r600_screen *rscreen = (struct r600_screen *)screen;
-		if (r600_get_minor_version(rscreen->radeon) >= 9)
-			force_tiling = debug_get_bool_option("R600_TILING", TRUE);
-#endif
-		force_tiling = debug_get_bool_option("R600_TILING", FALSE);
-	}
-
-	if (force_tiling && permit_hardware_blit(screen, templ)) {
-		if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
-		    !(templ->bind & PIPE_BIND_SCANOUT)) {
+	if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
+	    !(templ->bind & PIPE_BIND_SCANOUT)) {
+		if (util_format_is_compressed(templ->format)) {
+			array_mode = V_038000_ARRAY_1D_TILED_THIN1;
+		}
+		else if (debug_get_option_tiling_enabled() &&
+			 r600_get_minor_version(radeon) >= 9 &&
+			 permit_hardware_blit(screen, templ)) {
 			array_mode = V_038000_ARRAY_2D_TILED_THIN1;
 		}
 	}
 
-	if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
-	    util_format_is_compressed(templ->format))
-		array_mode = V_038000_ARRAY_1D_TILED_THIN1;
-
 	return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode,
 								  0, 0, NULL, TRUE);
 }

From 751a6ed893d393eaea266d892402d132f7d15a7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 19 Aug 2011 21:48:12 +0200
Subject: [PATCH 448/600] r600g: hack around a problem with texture alignment

---
 src/gallium/drivers/r600/r600_texture.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 74219e8005f..5681dd88e50 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -198,6 +198,16 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen,
 	height = util_format_get_nblocksy(rtex->real_format, height);
 	tile_height = r600_get_height_alignment(screen,
 						rtex->array_mode[level]);
+
+	/* XXX Hack around an alignment issue. Less tests fail with this.
+	 *
+	 * The thing is depth-stencil buffers should be tiled, i.e.
+	 * the alignment should be >=8. If I make them tiled, stencil starts
+	 * working because it no longer overlaps with the depth buffer
+	 * in memory, but texturing like drawpix-stencil breaks. */
+	if (util_format_is_depth_or_stencil(rtex->real_format) && tile_height < 8)
+		tile_height = 8;
+
 	height = align(height, tile_height);
 	return height;
 }

From 565f39bdb2943bdb94ac3bdf67793c942ff45016 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 19 Aug 2011 22:27:00 +0200
Subject: [PATCH 449/600] r600g: rename resource -> view in create_sampler_view

The sampler view is not a resource.
Also remove the unused desc variable.
---
 src/gallium/drivers/r600/evergreen_state.c | 28 +++++++++-----------
 src/gallium/drivers/r600/r600_state.c      | 30 ++++++++++------------
 2 files changed, 26 insertions(+), 32 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index f2b3b8304bb..3c17bbad7cd 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -948,43 +948,39 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
 							struct pipe_resource *texture,
 							const struct pipe_sampler_view *state)
 {
-	struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view);
+	struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view);
 	struct r600_pipe_resource_state *rstate;
-	const struct util_format_description *desc;
-	struct r600_resource_texture *tmp;
+	struct r600_resource_texture *tmp = (struct r600_resource_texture*)texture;
 	struct r600_resource *rbuffer;
 	unsigned format, endian;
 	uint32_t word4 = 0, yuv_format = 0, pitch = 0;
 	unsigned char swizzle[4], array_mode = 0, tile_type = 0;
 	struct r600_bo *bo[2];
 
-	if (resource == NULL)
+	if (view == NULL)
 		return NULL;
-	rstate = &resource->state;
+	rstate = &view->state;
 
 	/* initialize base object */
-	resource->base = *state;
-	resource->base.texture = NULL;
+	view->base = *state;
+	view->base.texture = NULL;
 	pipe_reference(NULL, &texture->reference);
-	resource->base.texture = texture;
-	resource->base.reference.count = 1;
-	resource->base.context = ctx;
+	view->base.texture = texture;
+	view->base.reference.count = 1;
+	view->base.context = ctx;
 
 	swizzle[0] = state->swizzle_r;
 	swizzle[1] = state->swizzle_g;
 	swizzle[2] = state->swizzle_b;
 	swizzle[3] = state->swizzle_a;
+
 	format = r600_translate_texformat(ctx->screen, state->format,
 					  swizzle,
 					  &word4, &yuv_format);
 	if (format == ~0) {
 		format = 0;
 	}
-	desc = util_format_description(state->format);
-	if (desc == NULL) {
-		R600_ERR("unknow format %d\n", state->format);
-	}
-	tmp = (struct r600_resource_texture *)texture;
+
 	if (tmp->depth && !tmp->is_flushing_texture) {
 		r600_texture_depth_flush(ctx, texture, TRUE);
 		tmp = tmp->flushed_depth_texture;
@@ -1029,7 +1025,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
 	rstate->val[7] = (S_03001C_DATA_FORMAT(format) |
 			  S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE));
 
-	return &resource->base;
+	return &view->base;
 }
 
 static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 19bef31468d..fba2af8a6ac 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -996,10 +996,9 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
 							struct pipe_resource *texture,
 							const struct pipe_sampler_view *state)
 {
-	struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view);
+	struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view);
 	struct r600_pipe_resource_state *rstate;
-	const struct util_format_description *desc;
-	struct r600_resource_texture *tmp;
+	struct r600_resource_texture *tmp = (struct r600_resource_texture*)texture;
 	struct r600_resource *rbuffer;
 	unsigned format, endian;
 	uint32_t word4 = 0, yuv_format = 0, pitch = 0;
@@ -1007,43 +1006,42 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
 	struct r600_bo *bo[2];
 	unsigned width, height, depth, offset_level, last_level;
 
-	if (resource == NULL)
+	if (view == NULL)
 		return NULL;
-	rstate = &resource->state;
+	rstate = &view->state;
 
 	/* initialize base object */
-	resource->base = *state;
-	resource->base.texture = NULL;
+	view->base = *state;
+	view->base.texture = NULL;
 	pipe_reference(NULL, &texture->reference);
-	resource->base.texture = texture;
-	resource->base.reference.count = 1;
-	resource->base.context = ctx;
+	view->base.texture = texture;
+	view->base.reference.count = 1;
+	view->base.context = ctx;
 
 	swizzle[0] = state->swizzle_r;
 	swizzle[1] = state->swizzle_g;
 	swizzle[2] = state->swizzle_b;
 	swizzle[3] = state->swizzle_a;
+
 	format = r600_translate_texformat(ctx->screen, state->format,
 					  swizzle,
 					  &word4, &yuv_format);
 	if (format == ~0) {
 		format = 0;
 	}
-	desc = util_format_description(state->format);
-	if (desc == NULL) {
-		R600_ERR("unknown format %d\n", state->format);
-	}
-	tmp = (struct r600_resource_texture *)texture;
+
 	if (tmp->depth && !tmp->is_flushing_texture) {
 	        r600_texture_depth_flush(ctx, texture, TRUE);
 		tmp = tmp->flushed_depth_texture;
 	}
+
 	endian = r600_colorformat_endian_swap(format);
 
 	if (tmp->force_int_type) {
 		word4 &= C_038010_NUM_FORMAT_ALL;
 		word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT);
 	}
+
 	rbuffer = &tmp->resource;
 	bo[0] = rbuffer->bo;
 	bo[1] = rbuffer->bo;
@@ -1092,7 +1090,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
 	rstate->val[6] = (S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) |
 			  S_038018_MAX_ANISO(4 /* max 16 samples */));
 
-	return &resource->base;
+	return &view->base;
 }
 
 static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count,

From 754ea4ea76f1d5ac6150090cffe2542bdf178d87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 19 Aug 2011 22:43:08 +0200
Subject: [PATCH 450/600] r600g: finally enable float depth buffers on
 evergreen

---
 src/gallium/drivers/r600/evergreen_state.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 3c17bbad7cd..2135b8ac580 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -264,6 +264,9 @@ static uint32_t r600_translate_dbformat(enum pipe_format format)
 	case PIPE_FORMAT_Z24X8_UNORM:
 	case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
 		return V_028040_Z_24;
+	case PIPE_FORMAT_Z32_FLOAT:
+	case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
+		return V_028040_Z_32_FLOAT;
 	default:
 		return ~0U;
 	}
@@ -351,6 +354,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format)
 
 	case PIPE_FORMAT_R11G11B10_FLOAT:
 	case PIPE_FORMAT_R32_FLOAT:
+	case PIPE_FORMAT_Z32_FLOAT:
 	case PIPE_FORMAT_R16G16_FLOAT:
 	case PIPE_FORMAT_R16G16_UNORM:
 		return V_028C70_SWAP_STD;
@@ -360,6 +364,7 @@ static uint32_t r600_translate_colorswap(enum pipe_format format)
 	case PIPE_FORMAT_R16G16B16A16_UNORM:
 	case PIPE_FORMAT_R16G16B16A16_SNORM:
 	case PIPE_FORMAT_R16G16B16A16_FLOAT:
+	case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
 
 	/* 128-bit buffers. */
 	case PIPE_FORMAT_R32G32B32A32_FLOAT:
@@ -444,7 +449,11 @@ static uint32_t r600_translate_colorformat(enum pipe_format format)
 	case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
 		return V_028C70_COLOR_24_8;
 
+	case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
+		return V_028C70_COLOR_X24_8_32_FLOAT;
+
 	case PIPE_FORMAT_R32_FLOAT:
+	case PIPE_FORMAT_Z32_FLOAT:
 		return V_028C70_COLOR_32_FLOAT;
 
 	case PIPE_FORMAT_R16G16_FLOAT:
@@ -532,6 +541,7 @@ static uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
 
 		case V_028C70_COLOR_32_32_FLOAT:
 		case V_028C70_COLOR_32_32:
+		case V_028C70_COLOR_X24_8_32_FLOAT:
 			return ENDIAN_8IN32;
 
 		/* 96-bit buffers. */
@@ -2113,6 +2123,7 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx)
 			offset_units *= 2.0f;
 			break;
 		case PIPE_FORMAT_Z32_FLOAT:
+		case PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED:
 			depth = -23;
 			offset_units *= 1.0f;
 			offset_db_fmt_cntl |= S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);

From db71537f01e0e6fd19e90bda6cfd727bb18832df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <maraeo@gmail.com>
Date: Fri, 19 Aug 2011 22:57:56 +0200
Subject: [PATCH 451/600] r600g: don't allocate separate depth and stencil for
 transfer textures on EG

The state tracker expects depth and stencil pixels interleaved.
Evergreen can bind an interleaved depth-stencil resource as a colorbuffer,
but not as a zbuffer.

The hardware can do the interleaving for us when decompressing.
---
 src/gallium/drivers/r600/r600_texture.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 5681dd88e50..e41fe11ac97 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -394,7 +394,8 @@ r600_texture_create_object(struct pipe_screen *screen,
 	rtex->real_format = base->format;
 
 	/* We must split depth and stencil into two separate buffers on Evergreen. */
-	if (r600_get_family_class(((struct r600_screen*)screen)->radeon) >= EVERGREEN &&
+	if (!(base->flags & R600_RESOURCE_FLAG_TRANSFER) &&
+	    r600_get_family_class(((struct r600_screen*)screen)->radeon) >= EVERGREEN &&
 	    util_format_is_depth_and_stencil(base->format)) {
 		struct pipe_resource stencil;
 		unsigned stencil_pitch_override = 0;

From 6a6441fc03b4f28bfd2619e4e6d260cf6ea82554 Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 18 Aug 2011 10:21:25 +0300
Subject: [PATCH 452/600] driconf: Add the PP descriptions

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/drivers/dri/common/xmlpool/options.h | 60 +++++++++++++++++++
 .../drivers/dri/common/xmlpool/t_options.h    | 30 ++++++++++
 2 files changed, 90 insertions(+)

diff --git a/src/mesa/drivers/dri/common/xmlpool/options.h b/src/mesa/drivers/dri/common/xmlpool/options.h
index d76595578c7..1e584ba086a 100644
--- a/src/mesa/drivers/dri/common/xmlpool/options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/options.h
@@ -425,6 +425,66 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \
         DRI_CONF_DESC(sv,"Använd HyperZ för att maximera prestandan") \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_PP_CELSHADE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(de,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(es,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(nl,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(fr,"A post-processing filter to cel-shade the output") \
+        DRI_CONF_DESC(sv,"A post-processing filter to cel-shade the output") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NORED(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(de,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(es,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(nl,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(fr,"A post-processing filter to remove the red channel") \
+        DRI_CONF_DESC(sv,"A post-processing filter to remove the red channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOGREEN(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(de,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(es,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(nl,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(fr,"A post-processing filter to remove the green channel") \
+        DRI_CONF_DESC(sv,"A post-processing filter to remove the green channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOBLUE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(de,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(es,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(nl,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(fr,"A post-processing filter to remove the blue channel") \
+        DRI_CONF_DESC(sv,"A post-processing filter to remove the blue channel") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+        DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+        DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+DRI_CONF_OPT_END
+
 #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
 DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
         DRI_CONF_DESC(en,"Number of texture units used") \
diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h
index 5fd6ec65bf8..2427aa77f5b 100644
--- a/src/mesa/drivers/dri/common/xmlpool/t_options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h
@@ -191,6 +191,36 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \
         DRI_CONF_DESC(en,gettext("Use HyperZ to boost performance")) \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_PP_CELSHADE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to cel-shade the output")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NORED(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to remove the red channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOGREEN(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to remove the green channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_NOBLUE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to remove the blue channel")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps")) \
+DRI_CONF_OPT_END
+
 #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
 DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
         DRI_CONF_DESC(en,gettext("Number of texture units used")) \

From 421235d42ad9921fd45332ec7b33bcee5c1ad33d Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Fri, 19 Aug 2011 18:23:10 +0300
Subject: [PATCH 453/600] st/dri: Bind the post-processing queue to dri

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 .../state_trackers/dri/common/dri_context.c   | 24 +++++++++++++++++++
 .../state_trackers/dri/common/dri_context.h   |  3 +++
 .../state_trackers/dri/common/dri_screen.c    |  8 ++++++-
 src/gallium/state_trackers/dri/drm/dri2.c     | 13 +++++++---
 src/gallium/state_trackers/dri/sw/drisw.c     |  3 +++
 5 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c
index e6612b1911d..bc8dacba1b7 100644
--- a/src/gallium/state_trackers/dri/common/dri_context.c
+++ b/src/gallium/state_trackers/dri/common/dri_context.c
@@ -48,6 +48,16 @@ dri_init_extensions(struct dri_context *ctx)
    driInitExtensions(st->ctx, NULL, GL_FALSE);
 }
 
+static void
+dri_pp_query(struct dri_context *ctx)
+{
+   unsigned int i;
+
+   for (i = 0; i < PP_FILTERS; i++) {
+      ctx->pp_enabled[i] = driQueryOptioni(&ctx->optionCache, pp_filters[i].name);
+   }
+}
+
 GLboolean
 dri_create_context(gl_api api, const struct gl_config * visual,
 		   __DRIcontext * cPriv, void *sharedContextPrivate)
@@ -105,6 +115,11 @@ dri_create_context(gl_api api, const struct gl_config * visual,
    if (api == API_OPENGL)
       dri_init_extensions(ctx);
 
+   // Context successfully created. See if post-processing is requested.
+   dri_pp_query(ctx);
+
+   ctx->pp = pp_init(screen->base.screen, ctx->pp_enabled);
+
    return GL_TRUE;
 
  fail:
@@ -134,6 +149,8 @@ dri_destroy_context(__DRIcontext * cPriv)
    ctx->st->flush(ctx->st, 0, NULL);
    ctx->st->destroy(ctx->st);
 
+   if (ctx->pp) pp_free(ctx->pp);
+
    FREE(ctx);
 }
 
@@ -187,6 +204,13 @@ dri_make_current(__DRIcontext * cPriv,
 
    ctx->stapi->make_current(ctx->stapi, ctx->st, &draw->base, &read->base);
 
+   // This is ok to call here. If they are already init, it's a no-op.
+   if (draw->textures[ST_ATTACHMENT_BACK_LEFT] && draw->textures[ST_ATTACHMENT_DEPTH_STENCIL]
+      && ctx->pp)
+         pp_init_fbos(ctx->pp, draw->textures[ST_ATTACHMENT_BACK_LEFT]->width0,
+            draw->textures[ST_ATTACHMENT_BACK_LEFT]->height0,
+            draw->textures[ST_ATTACHMENT_DEPTH_STENCIL]);
+
    return GL_TRUE;
 }
 
diff --git a/src/gallium/state_trackers/dri/common/dri_context.h b/src/gallium/state_trackers/dri/common/dri_context.h
index 35105e861f9..cfc8e3345e5 100644
--- a/src/gallium/state_trackers/dri/common/dri_context.h
+++ b/src/gallium/state_trackers/dri/common/dri_context.h
@@ -34,6 +34,7 @@
 
 #include "pipe/p_compiler.h"
 #include "dri_wrapper.h"
+#include "postprocess/filters.h"
 
 struct pipe_context;
 struct pipe_fence;
@@ -61,6 +62,8 @@ struct dri_context
    /* gallium */
    struct st_api *stapi;
    struct st_context_iface *st;
+   struct pp_queue_t *pp;
+   unsigned int pp_enabled[PP_FILTERS];
 };
 
 static INLINE struct dri_context *
diff --git a/src/gallium/state_trackers/dri/common/dri_screen.c b/src/gallium/state_trackers/dri/common/dri_screen.c
index c9647945d6e..dcb6fdf8f3c 100644
--- a/src/gallium/state_trackers/dri/common/dri_screen.c
+++ b/src/gallium/state_trackers/dri/common/dri_screen.c
@@ -51,10 +51,16 @@ PUBLIC const char __driConfigOptions[] =
       DRI_CONF_SECTION_QUALITY
 /*       DRI_CONF_FORCE_S3TC_ENABLE(false) */
          DRI_CONF_ALLOW_LARGE_TEXTURES(1)
+         DRI_CONF_PP_CELSHADE(0)
+         DRI_CONF_PP_NORED(0)
+         DRI_CONF_PP_NOGREEN(0)
+         DRI_CONF_PP_NOBLUE(0)
+         DRI_CONF_PP_JIMENEZMLAA(0, 0, 32)
+         DRI_CONF_PP_JIMENEZMLAA_COLOR(0, 0, 32)
       DRI_CONF_SECTION_END
    DRI_CONF_END;
 
-static const uint __driNConfigOptions = 3;
+static const uint __driNConfigOptions = 9;
 
 static const __DRIconfig **
 dri_fill_in_modes(struct dri_screen *screen,
diff --git a/src/gallium/state_trackers/dri/drm/dri2.c b/src/gallium/state_trackers/dri/drm/dri2.c
index cf476056f41..6cf237577ec 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -44,12 +44,19 @@
  * DRI2 flush extension.
  */
 static void
-dri2_flush_drawable(__DRIdrawable *draw)
+dri2_flush_drawable(__DRIdrawable *dPriv)
 {
-   struct dri_context *ctx = dri_get_current(draw->driScreenPriv);
+   struct dri_context *ctx = dri_get_current(dPriv->driScreenPriv);
+   struct dri_drawable *drawable = dri_drawable(dPriv);
+
+   struct pipe_resource *ptex = drawable->textures[ST_ATTACHMENT_BACK_LEFT];
+
+   if (ctx) {
+      if (ptex && ctx->pp && drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL])
+         pp_run(ctx->pp, ptex, ptex, drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]);
 
-   if (ctx)
       ctx->st->flush(ctx->st, 0, NULL);
+   }
 }
 
 static void
diff --git a/src/gallium/state_trackers/dri/sw/drisw.c b/src/gallium/state_trackers/dri/sw/drisw.c
index a1879a8f46a..082df55e8ea 100644
--- a/src/gallium/state_trackers/dri/sw/drisw.c
+++ b/src/gallium/state_trackers/dri/sw/drisw.c
@@ -136,6 +136,9 @@ drisw_swap_buffers(__DRIdrawable *dPriv)
    ptex = drawable->textures[ST_ATTACHMENT_BACK_LEFT];
 
    if (ptex) {
+      if (ctx->pp && drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL])
+         pp_run(ctx->pp, ptex, ptex, drawable->textures[ST_ATTACHMENT_DEPTH_STENCIL]);
+
       ctx->st->flush(ctx->st, ST_FLUSH_FRONT, NULL);
 
       drisw_copy_to_front(dPriv, ptex);

From d2fdc58fe7ca59e7023ee955b59d92e3079cb277 Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 18 Aug 2011 10:21:58 +0300
Subject: [PATCH 454/600] aux/Makefile,SConscript: Build PP

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/Makefile   | 6 ++++++
 src/gallium/auxiliary/SConscript | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 7dae7bc908b..6634b392f76 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -64,6 +64,12 @@ C_SOURCES = \
 	pipebuffer/pb_bufmgr_pool.c \
 	pipebuffer/pb_bufmgr_slab.c \
 	pipebuffer/pb_validate.c \
+	postprocess/pp_celshade.c \
+	postprocess/pp_colors.c \
+	postprocess/pp_init.c \
+	postprocess/pp_mlaa.c \
+	postprocess/pp_run.c \
+	postprocess/pp_program.c \
 	rbug/rbug_connection.c \
 	rbug/rbug_context.c \
 	rbug/rbug_core.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index d18f55f1644..6535b0fb82a 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -110,6 +110,12 @@ source = [
     'pipebuffer/pb_bufmgr_pool.c',
     'pipebuffer/pb_bufmgr_slab.c',
     'pipebuffer/pb_validate.c',
+    'postprocess/pp_celshade.c',
+    'postprocess/pp_colors.c',
+    'postprocess/pp_init.c',
+    'postprocess/pp_mlaa.c',
+    'postprocess/pp_run.c',
+    'postprocess/pp_program.c',
     'rbug/rbug_connection.c',
     'rbug/rbug_context.c',
     'rbug/rbug_core.c',

From e86e4cf128105db8b3e18d34b28040a26be6516c Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 18 Aug 2011 10:22:12 +0300
Subject: [PATCH 455/600] pp: Docs

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/postprocess/ADDING | 87 ++++++++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 src/gallium/auxiliary/postprocess/ADDING

diff --git a/src/gallium/auxiliary/postprocess/ADDING b/src/gallium/auxiliary/postprocess/ADDING
new file mode 100644
index 00000000000..3735835142a
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/ADDING
@@ -0,0 +1,87 @@
+How to add a new post-processing filter
+=======================================
+
+The Gallium post-processing queue works by passing the current screen to a fragment shader.
+These shaders may be written in any supported language, but are added here in TGSI text
+assembly.
+
+You can translate GLSL/ARB fairly easily via llvmpipe (LP_DEBUG=tgsi). I don't know the
+status of the D3D state tracker, but if/when that works, I'd assume HLSL would be possible
+too.
+
+
+
+Steps
+=====
+
+1. Add it to PP
+2. Make it known to PP
+3. Make it known to driconf
+4. ????
+5. Profit
+
+
+
+
+1. Add it to PP
+---------------
+
+Once you have the shader(s) in TGSI asm, put them to static const char arrays in a header
+file (see pp_colors.h).
+
+Add the filter's prototypes (main and init functions) to postprocess.h. This is mostly a
+copy-paste job with only changing the name.
+
+Then create a file containing empty main and init functions, named as you specified above.
+See pp_colors.c for an example.
+
+
+
+2. Make it known to PP
+----------------------
+
+Add your filter to filters.h, in a correct place. Placement is important, AA should usually
+be the last effect in the queue for example.
+
+Name is the config option your filter will be enabled by, both in driconf and as an env var.
+
+Inner temp means an intermediate framebuffer you may use in your filter to store
+results between passes. If you have a single-pass filter, request 0 of those.
+
+Shaders is the number of shaders your filter needs. The minimum is 2.
+
+
+You could also write the init and main functions now. If your filter is single-pass without
+a vertex shader and any other input than the main screen, you can use pp_nocolor as your
+main function as is.
+
+
+
+3. Make it known to driconf
+---------------------------
+
+First time outside of auxiliary/postprocess. First, add a suitable description to
+drivers/dri/common/xmlpool/t_options.h, and regenerate options.h by running make in that
+directory. Use the name you put into filters.h as the config option name.
+
+With driconf aware of the option, make Gallium aware of it too. Add it to
+state_trackers/dri/common/dri_screen.c in a proper section, specifying its default value and
+the accepted range (if applicable).
+
+Do check that __driNConfigOptions is still correct after the addition.
+
+
+
+4. ????
+-------
+
+Testing, praying, hookers, blow, sacrificial lambs...
+
+
+
+5. Profit
+---------
+
+Assuming you got here, sharing is caring. Send your filter to mesa-dev.
+
+

From e453289a77860634b8216dbde7e3631001dde44f Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 18 Aug 2011 10:22:26 +0300
Subject: [PATCH 456/600] pp: Color filters

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/postprocess/pp_colors.c | 80 +++++++++++++++++++
 src/gallium/auxiliary/postprocess/pp_colors.h | 69 ++++++++++++++++
 2 files changed, 149 insertions(+)
 create mode 100644 src/gallium/auxiliary/postprocess/pp_colors.c
 create mode 100644 src/gallium/auxiliary/postprocess/pp_colors.h

diff --git a/src/gallium/auxiliary/postprocess/pp_colors.c b/src/gallium/auxiliary/postprocess/pp_colors.c
new file mode 100644
index 00000000000..36bb1f552f5
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_colors.c
@@ -0,0 +1,80 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "postprocess/postprocess.h"
+#include "postprocess/pp_colors.h"
+#include "postprocess/pp_filters.h"
+
+/** The run function of the color filters */
+void
+pp_nocolor(struct pp_queue_t *ppq, struct pipe_resource *in,
+           struct pipe_resource *out, unsigned int n)
+{
+
+   struct program *p = ppq->p;
+
+   pp_filter_setup_in(p, in);
+   pp_filter_setup_out(p, out);
+
+   pp_filter_set_fb(p);
+   pp_filter_misc_state(p);
+
+   cso_single_sampler(p->cso, 0, &p->sampler_point);
+   cso_single_sampler_done(p->cso);
+   cso_set_fragment_sampler_views(p->cso, 1, &p->view);
+
+   cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]);
+   cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][1]);
+
+   pp_filter_draw(p);
+   pp_filter_end_pass(p);
+}
+
+
+/* Init functions */
+
+void
+pp_nored_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
+{
+   ppq->shaders[n][1] = pp_tgsi_to_state(ppq->p->pipe, nored, false, "nored");
+}
+
+
+void
+pp_nogreen_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
+{
+   ppq->shaders[n][1] =
+      pp_tgsi_to_state(ppq->p->pipe, nogreen, false, "nogreen");
+}
+
+
+void
+pp_noblue_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
+{
+   ppq->shaders[n][1] =
+      pp_tgsi_to_state(ppq->p->pipe, noblue, false, "noblue");
+}
diff --git a/src/gallium/auxiliary/postprocess/pp_colors.h b/src/gallium/auxiliary/postprocess/pp_colors.h
new file mode 100644
index 00000000000..588cd2f0c52
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_colors.h
@@ -0,0 +1,69 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef PP_COLORS_H
+#define PP_COLORS_H
+
+static const char nored[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL TEMP[0]\n"
+   "IMM FLT32 {    0.0000,     0.0000,     0.0000,     0.0000}\n"
+   "  0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
+   "  1: MOV TEMP[0].x, IMM[0].xxxx\n"
+   "  2: MOV OUT[0], TEMP[0]\n"
+   "  3: END\n";
+
+
+static const char nogreen[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL TEMP[0]\n"
+   "IMM FLT32 {    0.0000,     0.0000,     0.0000,     0.0000}\n"
+   "  0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
+   "  1: MOV TEMP[0].y, IMM[0].xxxx\n"
+   "  2: MOV OUT[0], TEMP[0]\n"
+   "  3: END\n";
+
+
+static const char noblue[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL TEMP[0]\n"
+   "IMM FLT32 {    0.0000,     0.0000,     0.0000,     0.0000}\n"
+   "  0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
+   "  1: MOV TEMP[0].z, IMM[0].xxxx\n"
+   "  2: MOV OUT[0], TEMP[0]\n"
+   "  3: END\n";
+
+#endif

From 6ff00c1afae51512d2680b096d5a9b4d618e82b9 Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 18 Aug 2011 10:22:41 +0300
Subject: [PATCH 457/600] pp: Cel-shade filter

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 .../auxiliary/postprocess/pp_celshade.c       | 38 +++++++++
 .../auxiliary/postprocess/pp_celshade.h       | 79 +++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 src/gallium/auxiliary/postprocess/pp_celshade.c
 create mode 100644 src/gallium/auxiliary/postprocess/pp_celshade.h

diff --git a/src/gallium/auxiliary/postprocess/pp_celshade.c b/src/gallium/auxiliary/postprocess/pp_celshade.c
new file mode 100644
index 00000000000..4454764ea84
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_celshade.c
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "postprocess/postprocess.h"
+#include "postprocess/pp_celshade.h"
+#include "postprocess/pp_filters.h"
+
+/** Init function */
+void
+pp_celshade_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
+{
+   ppq->shaders[n][1] =
+      pp_tgsi_to_state(ppq->p->pipe, celshade, false, "celshade");
+}
diff --git a/src/gallium/auxiliary/postprocess/pp_celshade.h b/src/gallium/auxiliary/postprocess/pp_celshade.h
new file mode 100644
index 00000000000..536ac7f1f1c
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_celshade.h
@@ -0,0 +1,79 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef CELSHADE_H
+#define CELSHADE_H
+
+static const char celshade[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL TEMP[0..4]\n"
+   "IMM FLT32 {    0.2126,     0.7152,     0.0722,     4.0000}\n"
+   "IMM FLT32 {    0.5000,     2.0000,     1.0000,    -0.1250}\n"
+   "IMM FLT32 {    0.2500,     0.1000,     0.1250,     3.0000}\n"
+   "  0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D\n"
+   "  1: DP3 TEMP[1].x, TEMP[0].xyzz, IMM[0]\n"
+   "  2: MUL TEMP[3].x, TEMP[1].xxxx, IMM[0].wwww\n"
+   "  3: ROUND TEMP[2].x, TEMP[3].xxxx\n"
+   "  4: MUL TEMP[3].x, TEMP[2].xxxx, IMM[2].xxxx\n"
+   "  5: MOV TEMP[2].x, TEMP[3].xxxx\n"
+   "  6: ADD TEMP[4].x, TEMP[1].xxxx, -TEMP[3].xxxx\n"
+   "  7: SGT TEMP[1].w, TEMP[4].xxxx, IMM[2].yyyy\n"
+   "  8: IF TEMP[1].wwww :19\n"
+   "  9:   ADD TEMP[4].y, TEMP[3].xxxx, IMM[2].yyyy\n"
+   " 10:   ADD TEMP[1].z, TEMP[1].xxxx, -TEMP[4].yyyy\n"
+   " 11:   ADD TEMP[1].y, TEMP[3].xxxx, IMM[2].zzzz\n"
+   " 12:   ADD TEMP[2].x, TEMP[1].yyyy, -TEMP[4].yyyy\n"
+   " 13:   RCP TEMP[4].y, TEMP[2].xxxx\n"
+   " 14:   MUL TEMP[2].x, TEMP[1].zzzz, TEMP[4].yyyy\n"
+   " 15:   MAD TEMP[1].y, -IMM[1].yyyy, TEMP[2].xxxx, IMM[2].wwww\n"
+   " 16:   MUL TEMP[1].z, TEMP[2].xxxx, TEMP[1].yyyy\n"
+   " 17:   MUL TEMP[1].y, TEMP[2].xxxx, TEMP[1].zzzz\n"
+   " 18:   MAD TEMP[2].x, TEMP[1].yyyy, IMM[2].zzzz, TEMP[3].xxxx\n"
+   " 19: ENDIF\n"
+   " 20: SLT TEMP[3].x, TEMP[4].xxxx, -IMM[2].yyyy\n"
+   " 21: IF TEMP[3].xxxx :34\n"
+   " 22:   ADD TEMP[3].x, TEMP[2].xxxx, -IMM[2].zzzz\n"
+   " 23:   ADD TEMP[4].x, TEMP[1].xxxx, -TEMP[3].xxxx\n"
+   " 24:   ADD TEMP[1].x, TEMP[2].xxxx, -IMM[2].yyyy\n"
+   " 25:   ADD TEMP[4].y, TEMP[1].xxxx, -TEMP[3].xxxx\n"
+   " 26:   RCP TEMP[3].x, TEMP[4].yyyy\n"
+   " 27:   MUL TEMP[1].x, TEMP[4].xxxx, TEMP[3].xxxx\n"
+   " 28:   MAD TEMP[4].x, -IMM[1].yyyy, TEMP[1].xxxx, IMM[2].wwww\n"
+   " 29:   MUL TEMP[3].x, TEMP[1].xxxx, TEMP[4].xxxx\n"
+   " 30:   MUL TEMP[4].x, TEMP[1].xxxx, TEMP[3].xxxx\n"
+   " 31:   ADD TEMP[3].x, IMM[1].zzzz, -TEMP[4].xxxx\n"
+   " 32:   MAD TEMP[1].x, TEMP[3].xxxx, -IMM[2].zzzz, TEMP[2].xxxx\n"
+   " 33:   MOV TEMP[2].x, TEMP[1].xxxx\n"
+   " 34: ENDIF\n"
+   " 35: MAD TEMP[1].x, TEMP[2].xxxx, IMM[1].yyyy, IMM[2].yyyy\n"
+   " 36: MUL OUT[0], TEMP[0], TEMP[1].xxxx\n"
+   " 37: END\n";
+
+#endif

From f951550d3ff60fc693fc2881482fe8e491147ad9 Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 18 Aug 2011 10:22:55 +0300
Subject: [PATCH 458/600] pp: Add the MLAA areamap

The areamap contains precomputed data on different aliasing types.
It is necessary for good performance.

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 .../auxiliary/postprocess/pp_mlaa_areamap.h   | 2821 +++++++++++++++++
 1 file changed, 2821 insertions(+)
 create mode 100644 src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h

diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h b/src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h
new file mode 100644
index 00000000000..1446ff2cdf0
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa_areamap.h
@@ -0,0 +1,2821 @@
+/**
+ * Copyright (C) 2010 Jorge Jimenez (jorge@iryoku.com)
+ * Copyright (C) 2010 Belen Masia (bmasia@unizar.es)
+ * Copyright (C) 2010 Jose I. Echevarria (joseignacioechevarria@gmail.com)
+ * Copyright (C) 2010 Fernando Navarro (fernandn@microsoft.com)
+ * Copyright (C) 2010 Diego Gutierrez (diegog@unizar.es)
+ * Copyright (C) 2011 Lauri Kasanen (cand@gmx.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the following statement:
+ *
+ *       "Uses Jimenez's MLAA. Copyright (C) 2010 by Jorge Jimenez, Belen Masia,
+ *        Jose I. Echevarria, Fernando Navarro and Diego Gutierrez."
+ *
+ *       Only for use in the Mesa project, this point 2 is filled by naming the
+ *       technique Jimenez's MLAA in the Mesa config options.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
+ * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing official
+ * policies, either expressed or implied, of the copyright holders.
+ */
+
+#ifndef PP_MLAA_AREAMAP_H
+#define PP_MLAA_AREAMAP_H
+
+static const unsigned char areamap[] = {
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 10,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 0,
+   31, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51,
+   0, 21, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51, 0, 21, 0, 4, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 0, 3, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 0,
+   63, 0, 36, 0, 15, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72,
+   0, 47, 0, 28, 0, 12, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 2, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 0, 2,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0,
+   79, 0, 56, 0, 38, 0, 23, 0, 10, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85,
+   0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0,
+   9, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17,
+   0, 7, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113, 0,
+   89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 1, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92,
+   0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 0, 1, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0,
+   23, 0, 15, 0, 7, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30,
+   0, 21, 0, 13, 0, 6, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 0,
+   95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 1, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98,
+   0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 0, 1,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0,
+   35, 0, 26, 0, 19, 0, 12, 0, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40,
+   0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 117, 0,
+   100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0,
+   5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102,
+   0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10,
+   0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0,
+   44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 1, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48,
+   0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 0, 1, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0,
+   103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0,
+   14, 0, 9, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105,
+   0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18,
+   0, 13, 0, 8, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0,
+   52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 1, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55,
+   0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0,
+   106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0,
+   22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107,
+   0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26,
+   0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0,
+   58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0,
+   3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61,
+   0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7,
+   0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0,
+   108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0,
+   29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109,
+   0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32,
+   0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0,
+   63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0,
+   10, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66,
+   0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13,
+   0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0,
+   35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110,
+   0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38,
+   0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0,
+   68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0,
+   16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70,
+   0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19,
+   0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0,
+   41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0,
+   2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112,
+   0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43,
+   0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5,
+   0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0,
+   72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0,
+   22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74,
+   0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24,
+   0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0,
+   46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0,
+   8, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113,
+   0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48,
+   0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10,
+   0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0,
+   75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0,
+   27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77,
+   0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29,
+   0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0,
+   50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0,
+   13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114,
+   0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52,
+   0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15,
+   0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0,
+   78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0,
+   31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0,
+   2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80,
+   0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34,
+   0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4,
+   0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0,
+   54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0,
+   17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115,
+   0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55,
+   0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20,
+   0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0,
+   81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0,
+   36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0,
+   6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82,
+   0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37,
+   0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8,
+   0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0,
+   57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0,
+   22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115,
+   0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59,
+   0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24,
+   0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0,
+   83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0,
+   39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0,
+   10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85,
+   0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41,
+   0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12,
+   0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0,
+   60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0,
+   25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0,
+   1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, 63, 0, 85, 0, 95, 0, 102,
+   0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0,
+      119,
+   0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0,
+      122,
+   0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 62, 0,
+      63,
+   0, 85, 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0,
+      116,
+   0, 117, 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0,
+      121,
+   0, 121, 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0,
+      123,
+   0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 31, 31, 0, 63, 0, 85, 0, 95, 0, 102, 0, 106,
+   0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, 119, 0,
+      119,
+   0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, 122, 0,
+      122,
+   0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 31, 31, 0, 63, 0,
+      85,
+   0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0,
+      117,
+   0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0,
+      121,
+   0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0,
+      123,
+   0, 0, 0, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92,
+   0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0,
+      109,
+   0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0,
+      115,
+   0, 115, 0, 115, 0, 116, 0, 63, 0, 20, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79,
+   0, 85, 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106,
+   0, 107, 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0,
+      113,
+   0, 114, 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0,
+   10, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, 95,
+   0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0,
+      110,
+   0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, 115, 0,
+      115,
+   0, 115, 0, 116, 63, 0, 10, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85,
+   0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107,
+   0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0,
+      114,
+   0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 6, 0, 21, 0, 36,
+   0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89,
+   0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103,
+   0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 85, 0,
+      31,
+   0, 12, 0, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81,
+   0, 85, 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100,
+   0, 102, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0,
+      108,
+   0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 0, 6, 6, 0, 21, 0, 36, 0, 47,
+   0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, 92,
+   0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, 104,
+   0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 85, 0, 31, 0, 6, 6,
+   0, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85,
+   0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102,
+   0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0,
+      109,
+   0, 0, 0, 0, 0, 0, 0, 4, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58,
+   0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88,
+   0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100,
+   0, 101, 0, 102, 0, 102, 0, 95, 0, 51, 0, 21, 0, 8, 0, 15, 0, 28, 0, 38,
+   0, 46, 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82,
+   0, 85, 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97,
+   0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0,
+   51, 0, 21, 0, 4, 4, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, 63,
+   0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, 90,
+   0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101,
+   0, 102, 0, 102, 95, 0, 51, 0, 21, 0, 4, 4, 0, 15, 0, 28, 0, 38, 0, 46,
+   0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85,
+   0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98,
+   0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
+   0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67,
+   0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87,
+   0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 102, 0, 63,
+   0, 36, 0, 15, 0, 6, 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55,
+   0, 60, 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83,
+   0, 85, 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95,
+   0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 0, 3, 3, 0, 12,
+   0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, 70,
+   0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, 89,
+   0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 102, 0, 63, 0, 36, 0,
+   15, 0, 3, 3, 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60,
+   0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85,
+   0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 10, 0, 19, 0, 27, 0, 34,
+   0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71,
+   0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87,
+   0, 88, 0, 89, 0, 90, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 4, 0, 10,
+   0, 19, 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63,
+   0, 66, 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83,
+   0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0,
+   72, 0, 47, 0, 28, 0, 12, 0, 2, 2, 0, 10, 0, 19, 0, 27, 0, 34, 0, 39,
+   0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, 73,
+   0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, 88,
+   0, 89, 0, 90, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 2, 2, 0, 10, 0, 19,
+   0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66,
+   0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85,
+   0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48,
+   0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74,
+   0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 109, 0, 79,
+   0, 56, 0, 38, 0, 23, 0, 10, 0, 4, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35,
+   0, 40, 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68,
+   0, 70, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83,
+   0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 0,
+   2, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, 52,
+   0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, 75,
+   0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 109, 0, 79, 0, 56, 0,
+   38, 0, 23, 0, 10, 0, 2, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40,
+   0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70,
+   0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 7, 0, 15,
+   0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56,
+   0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75,
+   0, 77, 0, 78, 0, 79, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9,
+   0, 4, 0, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47,
+   0, 51, 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71,
+   0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0,
+   85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 2, 2, 0, 7, 0, 15, 0, 21,
+   0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, 59,
+   0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, 77,
+   0, 78, 0, 79, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 2, 2,
+   0, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51,
+   0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72,
+   0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 1, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33,
+   0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61,
+   0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 113, 0, 89,
+   0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 2, 0, 7, 0, 13, 0, 19,
+   0, 24, 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55,
+   0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73,
+   0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0,
+   17, 0, 7, 0, 1, 1, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, 37,
+   0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, 63,
+   0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 113, 0, 89, 0, 69, 0,
+   53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 1, 1, 0, 7, 0, 13, 0, 19, 0, 24,
+   0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57,
+   0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+   0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43,
+   0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65,
+   0, 66, 0, 68, 0, 69, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23,
+   0, 15, 0, 7, 0, 2, 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34,
+   0, 37, 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60,
+   0, 62, 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0,
+   92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 0, 1, 1, 0, 6,
+   0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, 46,
+   0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 66,
+   0, 68, 0, 69, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0,
+   7, 0, 1, 1, 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37,
+   0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62,
+   0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 5, 0, 11, 0, 15, 0, 20,
+   0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51,
+   0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 115, 0, 95,
+   0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 2, 0, 5,
+   0, 11, 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43,
+   0, 46, 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63,
+   0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0,
+   30, 0, 21, 0, 13, 0, 6, 0, 1, 1, 0, 5, 0, 11, 0, 15, 0, 20, 0, 24,
+   0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, 53,
+   0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 115, 0, 95, 0, 78, 0,
+   63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 1, 1, 0, 5, 0, 11,
+   0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46,
+   0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32,
+   0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55,
+   0, 57, 0, 59, 0, 60, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35,
+   0, 26, 0, 19, 0, 12, 0, 5, 0, 2, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22,
+   0, 26, 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50,
+   0, 52, 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0,
+   98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 0,
+   1, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, 35,
+   0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, 57,
+   0, 59, 0, 60, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0,
+   19, 0, 12, 0, 5, 0, 1, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26,
+   0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52,
+   0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 9,
+   0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41,
+   0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 117, 0, 100,
+   0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5,
+   0, 2, 0, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33,
+   0, 36, 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55,
+   0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0,
+   40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 1, 1, 0, 4, 0, 9, 0, 13,
+   0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, 43,
+   0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 117, 0, 100, 0, 85, 0,
+   71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 1, 1,
+   0, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36,
+   0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23,
+   0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47,
+   0, 49, 0, 51, 0, 52, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44,
+   0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 2, 0, 4, 0, 8, 0, 12,
+   0, 16, 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41,
+   0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0,
+   102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0,
+   10, 0, 4, 0, 1, 1, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, 26,
+   0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49,
+   0, 51, 0, 52, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0,
+   28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 1, 1, 0, 4, 0, 8, 0, 12, 0, 16,
+   0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43,
+   0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+   0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32,
+   0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 119, 0, 103,
+   0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14,
+   0, 9, 0, 4, 0, 2, 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24,
+   0, 27, 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47,
+   0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0,
+   48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 0, 1, 1, 0, 4,
+   0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, 35,
+   0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 119, 0, 103, 0, 89, 0,
+   77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0,
+   4, 0, 1, 1, 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27,
+   0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 7, 0, 11, 0, 14,
+   0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39,
+   0, 41, 0, 43, 0, 45, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52,
+   0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 2, 0, 3,
+   0, 7, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33,
+   0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0,
+   105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0,
+   18, 0, 13, 0, 8, 0, 4, 0, 1, 1, 0, 3, 0, 7, 0, 11, 0, 14, 0, 17,
+   0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, 41,
+   0, 43, 0, 45, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0,
+   37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 1, 1, 0, 3, 0, 7,
+   0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35,
+   0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24,
+   0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 120, 0, 106,
+   0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22,
+   0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16,
+   0, 19, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39,
+   0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0,
+   55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0,
+   0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, 27,
+   0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 120, 0, 106, 0, 93, 0,
+   82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0,
+   12, 0, 8, 0, 3, 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19,
+   0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6,
+   0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32,
+   0, 34, 0, 36, 0, 38, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58,
+   0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3,
+   0, 0, 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26,
+   0, 28, 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0,
+   107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0,
+   26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 10,
+   0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, 34,
+   0, 36, 0, 38, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0,
+   44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0,
+   0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28,
+   0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17,
+   0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 120, 0, 108,
+   0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29,
+   0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 3, 0, 6, 0, 9,
+   0, 12, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33,
+   0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0,
+   61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0,
+   7, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, 20,
+   0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 120, 0, 108, 0, 97, 0,
+   86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0,
+   19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12,
+   0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26,
+   0, 28, 0, 30, 0, 31, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63,
+   0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10,
+   0, 6, 0, 3, 0, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19,
+   0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0,
+   32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 3,
+   0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, 28,
+   0, 30, 0, 31, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0,
+   50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0,
+   3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21,
+   0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11,
+   0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 121, 0, 110,
+   0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35,
+   0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 3,
+   0, 5, 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26,
+   0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0,
+   66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0,
+   13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, 13,
+   0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 121, 0, 110, 0, 99, 0,
+   90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0,
+   26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 0, 3, 0, 5,
+   0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20,
+   0, 22, 0, 24, 0, 25, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68,
+   0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16,
+   0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13,
+   0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0,
+   38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0,
+   0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, 22,
+   0, 24, 0, 25, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0,
+   55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0,
+   9, 0, 6, 0, 3, 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15,
+   0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5,
+   0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 121, 0, 111,
+   0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41,
+   0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2,
+   0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21,
+   0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0,
+   70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0,
+   19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7,
+   0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 121, 0, 111, 0, 102, 0,
+   93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0,
+   31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0,
+   0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14,
+   0, 16, 0, 18, 0, 20, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72,
+   0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22,
+   0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7,
+   0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0,
+   43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0,
+   5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, 16,
+   0, 18, 0, 20, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0,
+   59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0,
+   15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10,
+   0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 122, 0, 112,
+   0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46,
+   0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8,
+   0, 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15,
+   0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0,
+   74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0,
+   24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 2,
+   0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 122, 0, 112, 0, 103, 0,
+   95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0,
+   36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0,
+   2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9,
+   0, 11, 0, 13, 0, 15, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75,
+   0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27,
+   0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 2,
+   0, 4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0,
+   48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0,
+   10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, 11,
+   0, 13, 0, 15, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0,
+   63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0,
+   20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4,
+   0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 122, 0, 113,
+   0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50,
+   0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13,
+   0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10,
+   0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0,
+   77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0,
+   29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0,
+   0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 122, 0, 113, 0, 105, 0,
+   97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0,
+   41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0,
+   7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4,
+   0, 6, 0, 8, 0, 10, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78,
+   0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31,
+   0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2,
+   0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0,
+   52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0,
+   15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6,
+   0, 8, 0, 10, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0,
+   67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0,
+   24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0,
+   0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 123, 0, 114,
+   0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54,
+   0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17,
+   0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6,
+   0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0,
+   80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0,
+   34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0,
+   4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 123, 0, 114, 0, 106, 0,
+   99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0,
+   45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0,
+   12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 2, 0, 4, 0, 6, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81,
+   0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36,
+   0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6,
+   0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0,
+   55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0,
+   20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 2,
+   0, 4, 0, 6, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0,
+   70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0,
+   29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0,
+   2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 123, 0, 115,
+   0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57,
+   0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22,
+   0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 2,
+   0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0,
+   82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0,
+   37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0,
+   8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 123, 0, 115, 0, 108, 0,
+   101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0,
+   49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0,
+   16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 1, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83,
+   0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39,
+   0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10,
+   0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0,
+   59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0,
+   24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0,
+   0, 0, 0, 1, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0,
+   73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0,
+   33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0,
+   6, 0, 4, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116,
+   0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60,
+   0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25,
+   0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0,
+   85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0,
+   41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0,
+   12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 123, 0, 116, 0, 109, 0,
+   102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0,
+   52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0,
+   20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, 63, 0, 85, 0, 95, 0, 102, 0,
+   106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0,
+      119, 0,
+   119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0,
+      122, 0,
+   122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 31, 31, 63,
+      0,
+   85, 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116,
+      0,
+   117, 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0,
+      121, 0,
+   121, 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0,
+      123, 0,
+   123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 63, 0, 85, 0, 95, 0, 102, 0, 106, 0,
+   109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, 119, 0,
+      119, 0,
+   120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, 122, 0,
+      122, 0,
+   122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 31, 31, 63, 0, 85,
+      0,
+   95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117,
+      0,
+   118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0, 121, 0, 121, 0,
+      121, 0,
+   122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0, 123, 0, 123, 0,
+      123, 0,
+   0, 0, 10, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0,
+   95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109,
+      0,
+   110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0,
+      115, 0,
+   115, 0, 115, 0, 116, 0, 0, 63, 10, 10, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0,
+   85, 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0,
+   107, 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0,
+      113, 0,
+   114, 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0,
+   20, 0, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, 95, 0,
+   98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0, 110,
+      0,
+   110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, 115, 0,
+      115, 0,
+   115, 0, 116, 0, 0, 63, 10, 10, 31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0,
+   89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0,
+   108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0, 113, 0, 113, 0,
+      114, 0,
+   114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 6, 0, 21, 0, 36, 0,
+   47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0,
+   92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0,
+   104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 0, 85, 0,
+      31,
+   6, 6, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0,
+   85, 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0,
+   102, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0,
+      108, 0,
+   109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 0, 12, 0, 21, 0, 36, 0, 47, 0,
+   56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, 92, 0,
+   93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, 104, 0,
+   105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 0, 85, 0, 31, 6, 6,
+   21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0,
+   87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0,
+   102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0, 108, 0, 108, 0,
+      109, 0,
+   0, 0, 0, 0, 0, 0, 4, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0,
+   63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0,
+   90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0,
+   101, 0, 102, 0, 102, 0, 0, 95, 0, 51, 0, 21, 4, 4, 15, 0, 28, 0, 38, 0,
+   46, 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0,
+   85, 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0,
+   98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0,
+   51, 0, 21, 0, 8, 0, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, 63, 0,
+   68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, 90, 0,
+   91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0,
+   102, 0, 102, 0, 0, 95, 0, 51, 0, 21, 4, 4, 15, 0, 28, 0, 38, 0, 46, 0,
+   53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0,
+   86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0,
+   99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
+   12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0,
+   70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0,
+   89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 0, 102, 0, 63,
+   0, 36, 0, 15, 3, 3, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0,
+   60, 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0,
+   85, 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0,
+   96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 0, 6, 0, 12, 0,
+   23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, 70, 0,
+   72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, 89, 0,
+   90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 0, 102, 0, 63, 0, 36,
+   0, 15, 3, 3, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0,
+   63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0,
+   86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 10, 0, 19, 0, 27, 0, 34, 0,
+   39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0,
+   73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0,
+   88, 0, 89, 0, 90, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 2, 2, 10, 0,
+   19, 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0,
+   66, 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0,
+   85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0,
+   72, 0, 47, 0, 28, 0, 12, 0, 4, 0, 10, 0, 19, 0, 27, 0, 34, 0, 39, 0,
+   44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, 73, 0,
+   75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, 88, 0,
+   89, 0, 90, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 2, 2, 10, 0, 19, 0,
+   27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0,
+   69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0,
+   86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 2, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0,
+   52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0,
+   75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 0, 109, 0, 79,
+   0, 56, 0, 38, 0, 23, 0, 10, 2, 2, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0,
+   40, 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0,
+   70, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0,
+   85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 0,
+   4, 0, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, 52, 0,
+   55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, 75, 0,
+   77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 0, 109, 0, 79, 0, 56,
+   0, 38, 0, 23, 0, 10, 2, 2, 9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0,
+   44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0,
+   72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 7, 0, 15, 0,
+   21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0,
+   59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0,
+   77, 0, 78, 0, 79, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9,
+   2, 2, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0,
+   51, 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0,
+   72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0,
+   85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 0, 4, 0, 7, 0, 15, 0, 21, 0,
+   26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, 59, 0,
+   61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, 77, 0,
+   78, 0, 79, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 2, 2,
+   7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0,
+   53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0,
+   74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 1, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0,
+   37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0,
+   63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 0, 113, 0, 89,
+   0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 1, 1, 7, 0, 13, 0, 19, 0,
+   24, 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0,
+   57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0,
+   74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0,
+   17, 0, 7, 0, 2, 0, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, 37, 0,
+   40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0,
+   65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 0, 113, 0, 89, 0, 69,
+   0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 1, 1, 7, 0, 13, 0, 19, 0, 24, 0,
+   28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0,
+   59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+   6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0,
+   46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0,
+   66, 0, 68, 0, 69, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23,
+   0, 15, 0, 7, 1, 1, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0,
+   37, 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0,
+   62, 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0,
+   92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 0, 2, 0, 6, 0,
+   12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, 46, 0,
+   49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 66, 0,
+   68, 0, 69, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15,
+   0, 7, 1, 1, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0,
+   40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0,
+   63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 5, 0, 11, 0, 15, 0, 20, 0,
+   24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0,
+   53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 0, 115, 0, 95,
+   0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 1, 1, 5, 0,
+   11, 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0,
+   46, 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0,
+   65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0,
+   30, 0, 21, 0, 13, 0, 6, 0, 2, 0, 5, 0, 11, 0, 15, 0, 20, 0, 24, 0,
+   28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, 53, 0,
+   55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 0, 115, 0, 95, 0, 78,
+   0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 1, 1, 5, 0, 11, 0,
+   15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0,
+   48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 1, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0,
+   35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0,
+   57, 0, 59, 0, 60, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35,
+   0, 26, 0, 19, 0, 12, 0, 5, 1, 1, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0,
+   26, 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0,
+   52, 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0,
+   98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 0,
+   2, 0, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, 35, 0,
+   38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, 57, 0,
+   59, 0, 60, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26,
+   0, 19, 0, 12, 0, 5, 1, 1, 5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0,
+   29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0,
+   54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 9, 0,
+   13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0,
+   43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 0, 117, 0, 100,
+   0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5,
+   1, 1, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0,
+   36, 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0,
+   56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0,
+   40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 0, 2, 0, 4, 0, 9, 0, 13, 0,
+   17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, 43, 0,
+   45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 0, 117, 0, 100, 0, 85,
+   0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 1, 1,
+   4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0,
+   38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0,
+   26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0,
+   49, 0, 51, 0, 52, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44,
+   0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 1, 1, 4, 0, 8, 0, 12, 0,
+   16, 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0,
+   43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0,
+   102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0,
+   10, 0, 4, 0, 2, 0, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, 26, 0,
+   29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0,
+   51, 0, 52, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36,
+   0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 1, 1, 4, 0, 8, 0, 12, 0, 16, 0,
+   19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0,
+   45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+   4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0,
+   35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 0, 119, 0, 103,
+   0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14,
+   0, 9, 0, 4, 1, 1, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0,
+   27, 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0,
+   48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0,
+   48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 0, 2, 0, 4, 0,
+   8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, 35, 0,
+   37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 0, 119, 0, 103, 0, 89,
+   0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9,
+   0, 4, 1, 1, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0,
+   30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 7, 0, 11, 0, 14, 0,
+   17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0,
+   41, 0, 43, 0, 45, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52,
+   0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 1, 1, 3, 0,
+   7, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0,
+   35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0,
+   105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0,
+   18, 0, 13, 0, 8, 0, 4, 0, 2, 0, 3, 0, 7, 0, 11, 0, 14, 0, 17, 0,
+   20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, 41, 0,
+   43, 0, 45, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44,
+   0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 1, 1, 3, 0, 7, 0,
+   11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0,
+   37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0,
+   27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 0, 120, 0, 106,
+   0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22,
+   0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0,
+   19, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0,
+   41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0,
+   55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0,
+   0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, 27, 0,
+   29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 0, 120, 0, 106, 0, 93,
+   0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17,
+   0, 12, 0, 8, 0, 3, 0, 0, 3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0,
+   22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, 0,
+   10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0,
+   34, 0, 36, 0, 38, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58,
+   0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3,
+   0, 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0,
+   28, 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0,
+   107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0,
+   26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 0, 3, 0, 6, 0, 10, 0,
+   13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, 34, 0,
+   36, 0, 38, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51,
+   0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0,
+   3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0,
+   30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0,
+   20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 0, 120, 0, 108,
+   0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29,
+   0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 3, 0, 6, 0, 9, 0,
+   12, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0,
+   35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0,
+   61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0,
+   7, 0, 3, 0, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, 20, 0,
+   22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 0, 120, 0, 108, 0, 97,
+   0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24,
+   0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0,
+   15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0,
+   28, 0, 30, 0, 31, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63,
+   0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10,
+   0, 6, 0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0,
+   21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0,
+   32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 0, 3, 0,
+   6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, 28, 0,
+   30, 0, 31, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56,
+   0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6,
+   0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0,
+   23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0,
+   13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 0, 121, 0, 110,
+   0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35,
+   0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 3, 0,
+   5, 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0,
+   28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0,
+   66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0,
+   13, 0, 10, 0, 6, 0, 3, 0, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, 13, 0,
+   16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 0, 121, 0, 110, 0, 99,
+   0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30,
+   0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 3, 0, 5, 0,
+   8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0,
+   22, 0, 24, 0, 25, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68,
+   0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16,
+   0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0,
+   15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0,
+   38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0,
+   0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, 22, 0,
+   24, 0, 25, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61,
+   0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13,
+   0, 9, 0, 6, 0, 3, 0, 0, 2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0,
+   17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 0,
+   7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 0, 121, 0, 111,
+   0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41,
+   0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2,
+   0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0,
+   23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0,
+   70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0,
+   19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, 0,
+   10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 0, 121, 0, 111, 0, 102,
+   0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36,
+   0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0,
+   2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0,
+   16, 0, 18, 0, 20, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72,
+   0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22,
+   0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0,
+   10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0,
+   43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0,
+   5, 0, 2, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, 16, 0,
+   18, 0, 20, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65,
+   0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18,
+   0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0,
+   12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 0, 122, 0, 112,
+   0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46,
+   0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8,
+   0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0,
+   17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0,
+   74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0,
+   24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 2, 0,
+   5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 0, 122, 0, 112, 0, 103,
+   0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41,
+   0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5,
+   0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0,
+   11, 0, 13, 0, 15, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75,
+   0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27,
+   0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 2, 0,
+   4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0,
+   48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0,
+   10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, 11, 0,
+   13, 0, 15, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69,
+   0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23,
+   0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0,
+   7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 0, 122, 0, 113,
+   0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50,
+   0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13,
+   0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0,
+   12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0,
+   77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0,
+   29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0,
+   0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 0, 122, 0, 113, 0, 105,
+   0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45,
+   0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10,
+   0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0,
+   6, 0, 8, 0, 10, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78,
+   0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31,
+   0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2,
+   0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0,
+   52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0,
+   15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0,
+   8, 0, 10, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72,
+   0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28,
+   0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0,
+   2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 0, 123, 0, 114,
+   0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54,
+   0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17,
+   0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0,
+   8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0,
+   80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0,
+   34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0,
+   4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 0, 123, 0, 114, 0, 106,
+   0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49,
+   0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15,
+   0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   2, 0, 4, 0, 6, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81,
+   0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36,
+   0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6,
+   0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0,
+   55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0,
+   20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 0, 2, 0,
+   4, 0, 6, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75,
+   0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32,
+   0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4,
+   0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 4, 0, 0, 123, 0, 115,
+   0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57,
+   0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22,
+   0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 2, 0,
+   4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0,
+   82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0,
+   37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0,
+   8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 2, 0, 4, 0, 0, 123, 0, 115, 0, 108,
+   0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53,
+   0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19,
+   0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 1, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83,
+   0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39,
+   0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10,
+   0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0,
+   59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0,
+   24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0,
+   0, 0, 1, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78,
+   0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36,
+   0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8,
+   0, 6, 0, 4, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116,
+   0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60,
+   0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25,
+   0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0,
+   85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0,
+   41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0,
+   12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 0, 123, 0, 116, 0, 109,
+   0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56,
+   0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23,
+   0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 31, 31, 63, 0, 85, 0, 95, 0, 102, 0, 106, 0, 109, 0,
+   111, 0, 113, 0, 114, 0, 115, 0, 116, 0, 117, 0, 118, 0, 119, 0, 119, 0,
+      120, 0,
+   120, 0, 120, 0, 121, 0, 121, 0, 121, 0, 121, 0, 122, 0, 122, 0, 122, 0,
+      122, 0,
+   122, 0, 123, 0, 123, 0, 123, 0, 123, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 31,
+   0, 63, 0, 85, 0, 95, 0, 102, 0, 106, 0, 109, 0, 111, 0, 113, 0, 114, 0,
+      115,
+   0, 116, 0, 117, 0, 118, 0, 119, 0, 119, 0, 120, 0, 120, 0, 120, 0, 121, 0,
+      121,
+   0, 121, 0, 121, 0, 122, 0, 122, 0, 122, 0, 122, 0, 122, 0, 123, 0, 123, 0,
+      123,
+   0, 123, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 10, 10,
+   31, 0, 51, 0, 63, 0, 72, 0, 79, 0, 85, 0, 89, 0, 92, 0, 95, 0, 98, 0,
+   100, 0, 102, 0, 103, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0, 110, 0,
+      110, 0,
+   111, 0, 112, 0, 112, 0, 113, 0, 113, 0, 114, 0, 114, 0, 115, 0, 115, 0,
+      115, 0,
+   116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 10, 10, 0, 31, 0, 51, 0, 63, 0, 72,
+   0, 79, 0, 85, 0, 89, 0, 92, 0, 95, 0, 98, 0, 100, 0, 102, 0, 103, 0, 105,
+   0, 106, 0, 107, 0, 108, 0, 109, 0, 110, 0, 110, 0, 111, 0, 112, 0, 112, 0,
+      113,
+   0, 113, 0, 114, 0, 114, 0, 115, 0, 115, 0, 115, 0, 116, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 85, 0, 31, 6, 6, 21, 0, 36, 0, 47, 0, 56, 0,
+   63, 0, 69, 0, 74, 0, 78, 0, 81, 0, 85, 0, 87, 0, 89, 0, 92, 0, 93, 0,
+   95, 0, 97, 0, 98, 0, 99, 0, 100, 0, 102, 0, 102, 0, 103, 0, 104, 0, 105, 0,
+   106, 0, 106, 0, 107, 0, 108, 0, 108, 0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 0,
+   31, 0, 6, 6, 0, 21, 0, 36, 0, 47, 0, 56, 0, 63, 0, 69, 0, 74, 0, 78,
+   0, 81, 0, 85, 0, 87, 0, 89, 0, 92, 0, 93, 0, 95, 0, 97, 0, 98, 0, 99,
+   0, 100, 0, 102, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 106, 0, 107, 0,
+      108,
+   0, 108, 0, 109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51,
+   0, 21, 4, 4, 15, 0, 28, 0, 38, 0, 46, 0, 53, 0, 58, 0, 63, 0, 68, 0,
+   71, 0, 75, 0, 77, 0, 80, 0, 82, 0, 85, 0, 86, 0, 88, 0, 90, 0, 91, 0,
+   93, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0,
+   102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 51, 0, 21, 0, 4, 4, 0, 15, 0, 28,
+   0, 38, 0, 46, 0, 53, 0, 58, 0, 63, 0, 68, 0, 71, 0, 75, 0, 77, 0, 80,
+   0, 82, 0, 85, 0, 86, 0, 88, 0, 90, 0, 91, 0, 93, 0, 94, 0, 95, 0, 96,
+   0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 102, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 102, 0, 63, 0, 36, 0, 15, 3, 3, 12, 0, 23, 0,
+   31, 0, 39, 0, 45, 0, 51, 0, 55, 0, 60, 0, 63, 0, 67, 0, 70, 0, 72, 0,
+   75, 0, 77, 0, 79, 0, 81, 0, 83, 0, 85, 0, 86, 0, 87, 0, 89, 0, 90, 0,
+   91, 0, 92, 0, 93, 0, 94, 0, 95, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 0,
+   63, 0, 36, 0, 15, 0, 3, 3, 0, 12, 0, 23, 0, 31, 0, 39, 0, 45, 0, 51,
+   0, 55, 0, 60, 0, 63, 0, 67, 0, 70, 0, 72, 0, 75, 0, 77, 0, 79, 0, 81,
+   0, 83, 0, 85, 0, 86, 0, 87, 0, 89, 0, 90, 0, 91, 0, 92, 0, 93, 0, 94,
+   0, 95, 0, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72,
+   0, 47, 0, 28, 0, 12, 2, 2, 10, 0, 19, 0, 27, 0, 34, 0, 39, 0, 44, 0,
+   49, 0, 53, 0, 57, 0, 60, 0, 63, 0, 66, 0, 69, 0, 71, 0, 73, 0, 75, 0,
+   77, 0, 79, 0, 80, 0, 82, 0, 83, 0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0,
+   90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 106, 0, 72, 0, 47, 0, 28, 0, 12, 0, 2, 2,
+   0, 10, 0, 19, 0, 27, 0, 34, 0, 39, 0, 44, 0, 49, 0, 53, 0, 57, 0, 60,
+   0, 63, 0, 66, 0, 69, 0, 71, 0, 73, 0, 75, 0, 77, 0, 79, 0, 80, 0, 82,
+   0, 83, 0, 85, 0, 86, 0, 87, 0, 88, 0, 89, 0, 90, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 109, 0, 79, 0, 56, 0, 38, 0, 23, 0, 10, 2, 2,
+   9, 0, 17, 0, 23, 0, 30, 0, 35, 0, 40, 0, 44, 0, 48, 0, 52, 0, 55, 0,
+   58, 0, 61, 0, 63, 0, 66, 0, 68, 0, 70, 0, 72, 0, 74, 0, 75, 0, 77, 0,
+   78, 0, 80, 0, 81, 0, 82, 0, 83, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0,
+   79, 0, 56, 0, 38, 0, 23, 0, 10, 0, 2, 2, 0, 9, 0, 17, 0, 23, 0, 30,
+   0, 35, 0, 40, 0, 44, 0, 48, 0, 52, 0, 55, 0, 58, 0, 61, 0, 63, 0, 66,
+   0, 68, 0, 70, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 80, 0, 81, 0, 82,
+   0, 83, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85,
+   0, 63, 0, 46, 0, 31, 0, 19, 0, 9, 2, 2, 7, 0, 15, 0, 21, 0, 26, 0,
+   31, 0, 36, 0, 40, 0, 44, 0, 47, 0, 51, 0, 53, 0, 56, 0, 59, 0, 61, 0,
+   63, 0, 65, 0, 67, 0, 69, 0, 71, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0,
+   79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 111, 0, 85, 0, 63, 0, 46, 0, 31, 0, 19, 0,
+   9, 0, 2, 2, 0, 7, 0, 15, 0, 21, 0, 26, 0, 31, 0, 36, 0, 40, 0, 44,
+   0, 47, 0, 51, 0, 53, 0, 56, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 69,
+   0, 71, 0, 72, 0, 74, 0, 75, 0, 77, 0, 78, 0, 79, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 113, 0, 89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17,
+   0, 7, 1, 1, 7, 0, 13, 0, 19, 0, 24, 0, 28, 0, 33, 0, 37, 0, 40, 0,
+   44, 0, 47, 0, 50, 0, 52, 0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0,
+   67, 0, 68, 0, 70, 0, 71, 0, 73, 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 113, 0,
+   89, 0, 69, 0, 53, 0, 39, 0, 27, 0, 17, 0, 7, 0, 1, 1, 0, 7, 0, 13,
+   0, 19, 0, 24, 0, 28, 0, 33, 0, 37, 0, 40, 0, 44, 0, 47, 0, 50, 0, 52,
+   0, 55, 0, 57, 0, 59, 0, 61, 0, 63, 0, 65, 0, 67, 0, 68, 0, 70, 0, 71,
+   0, 73, 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92,
+   0, 74, 0, 58, 0, 45, 0, 34, 0, 23, 0, 15, 0, 7, 1, 1, 6, 0, 12, 0,
+   17, 0, 22, 0, 26, 0, 30, 0, 34, 0, 37, 0, 40, 0, 43, 0, 46, 0, 49, 0,
+   51, 0, 54, 0, 56, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 66, 0, 68, 0,
+   69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 114, 0, 92, 0, 74, 0, 58, 0, 45, 0, 34, 0,
+   23, 0, 15, 0, 7, 0, 1, 1, 0, 6, 0, 12, 0, 17, 0, 22, 0, 26, 0, 30,
+   0, 34, 0, 37, 0, 40, 0, 43, 0, 46, 0, 49, 0, 51, 0, 54, 0, 56, 0, 58,
+   0, 60, 0, 62, 0, 63, 0, 65, 0, 66, 0, 68, 0, 69, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 115, 0, 95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30,
+   0, 21, 0, 13, 0, 6, 1, 1, 5, 0, 11, 0, 15, 0, 20, 0, 24, 0, 28, 0,
+   31, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48, 0, 51, 0, 53, 0, 55, 0,
+   57, 0, 58, 0, 60, 0, 62, 0, 63, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 0,
+   95, 0, 78, 0, 63, 0, 51, 0, 39, 0, 30, 0, 21, 0, 13, 0, 6, 0, 1, 1,
+   0, 5, 0, 11, 0, 15, 0, 20, 0, 24, 0, 28, 0, 31, 0, 35, 0, 38, 0, 41,
+   0, 43, 0, 46, 0, 48, 0, 51, 0, 53, 0, 55, 0, 57, 0, 58, 0, 60, 0, 62,
+   0, 63, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98,
+   0, 81, 0, 68, 0, 55, 0, 44, 0, 35, 0, 26, 0, 19, 0, 12, 0, 5, 1, 1,
+   5, 0, 10, 0, 14, 0, 18, 0, 22, 0, 26, 0, 29, 0, 32, 0, 35, 0, 38, 0,
+   41, 0, 43, 0, 46, 0, 48, 0, 50, 0, 52, 0, 54, 0, 55, 0, 57, 0, 59, 0,
+   60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 116, 0, 98, 0, 81, 0, 68, 0, 55, 0, 44, 0,
+   35, 0, 26, 0, 19, 0, 12, 0, 5, 0, 1, 1, 0, 5, 0, 10, 0, 14, 0, 18,
+   0, 22, 0, 26, 0, 29, 0, 32, 0, 35, 0, 38, 0, 41, 0, 43, 0, 46, 0, 48,
+   0, 50, 0, 52, 0, 54, 0, 55, 0, 57, 0, 59, 0, 60, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 117, 0, 100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40,
+   0, 31, 0, 24, 0, 17, 0, 11, 0, 5, 1, 1, 4, 0, 9, 0, 13, 0, 17, 0,
+   21, 0, 24, 0, 27, 0, 30, 0, 33, 0, 36, 0, 38, 0, 41, 0, 43, 0, 45, 0,
+   47, 0, 49, 0, 51, 0, 53, 0, 55, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 117, 0,
+   100, 0, 85, 0, 71, 0, 60, 0, 49, 0, 40, 0, 31, 0, 24, 0, 17, 0, 11, 0,
+   5, 0, 1, 1, 0, 4, 0, 9, 0, 13, 0, 17, 0, 21, 0, 24, 0, 27, 0, 30,
+   0, 33, 0, 36, 0, 38, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 53,
+   0, 55, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102,
+   0, 87, 0, 75, 0, 63, 0, 53, 0, 44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10,
+   0, 4, 1, 1, 4, 0, 8, 0, 12, 0, 16, 0, 19, 0, 23, 0, 26, 0, 29, 0,
+   31, 0, 34, 0, 36, 0, 39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0,
+   52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 118, 0, 102, 0, 87, 0, 75, 0, 63, 0, 53, 0,
+   44, 0, 36, 0, 28, 0, 22, 0, 15, 0, 10, 0, 4, 0, 1, 1, 0, 4, 0, 8,
+   0, 12, 0, 16, 0, 19, 0, 23, 0, 26, 0, 29, 0, 31, 0, 34, 0, 36, 0, 39,
+   0, 41, 0, 43, 0, 45, 0, 47, 0, 49, 0, 51, 0, 52, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 119, 0, 103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48,
+   0, 40, 0, 33, 0, 26, 0, 20, 0, 14, 0, 9, 0, 4, 1, 1, 4, 0, 8, 0,
+   11, 0, 15, 0, 18, 0, 21, 0, 24, 0, 27, 0, 30, 0, 32, 0, 35, 0, 37, 0,
+   39, 0, 41, 0, 43, 0, 45, 0, 47, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0,
+   103, 0, 89, 0, 77, 0, 67, 0, 57, 0, 48, 0, 40, 0, 33, 0, 26, 0, 20, 0,
+   14, 0, 9, 0, 4, 0, 1, 1, 0, 4, 0, 8, 0, 11, 0, 15, 0, 18, 0, 21,
+   0, 24, 0, 27, 0, 30, 0, 32, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45,
+   0, 47, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105,
+   0, 92, 0, 80, 0, 70, 0, 60, 0, 52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18,
+   0, 13, 0, 8, 0, 4, 1, 1, 3, 0, 7, 0, 11, 0, 14, 0, 17, 0, 20, 0,
+   23, 0, 26, 0, 28, 0, 31, 0, 33, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0,
+   45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 119, 0, 105, 0, 92, 0, 80, 0, 70, 0, 60, 0,
+   52, 0, 44, 0, 37, 0, 30, 0, 24, 0, 18, 0, 13, 0, 8, 0, 4, 0, 1, 1,
+   0, 3, 0, 7, 0, 11, 0, 14, 0, 17, 0, 20, 0, 23, 0, 26, 0, 28, 0, 31,
+   0, 33, 0, 35, 0, 37, 0, 39, 0, 41, 0, 43, 0, 45, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 120, 0, 106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55,
+   0, 47, 0, 40, 0, 34, 0, 28, 0, 22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0,
+   3, 0, 7, 0, 10, 0, 13, 0, 16, 0, 19, 0, 22, 0, 24, 0, 27, 0, 29, 0,
+   31, 0, 34, 0, 36, 0, 37, 0, 39, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0,
+   106, 0, 93, 0, 82, 0, 72, 0, 63, 0, 55, 0, 47, 0, 40, 0, 34, 0, 28, 0,
+   22, 0, 17, 0, 12, 0, 8, 0, 3, 0, 0, 0, 0, 3, 0, 7, 0, 10, 0, 13,
+   0, 16, 0, 19, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31, 0, 34, 0, 36, 0, 37,
+   0, 39, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107,
+   0, 95, 0, 85, 0, 75, 0, 66, 0, 58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26,
+   0, 21, 0, 16, 0, 11, 0, 7, 0, 3, 0, 0, 3, 0, 6, 0, 10, 0, 13, 0,
+   15, 0, 18, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 32, 0, 34, 0, 36, 0,
+   38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 120, 0, 107, 0, 95, 0, 85, 0, 75, 0, 66, 0,
+   58, 0, 51, 0, 44, 0, 37, 0, 31, 0, 26, 0, 21, 0, 16, 0, 11, 0, 7, 0,
+   3, 0, 0, 0, 0, 3, 0, 6, 0, 10, 0, 13, 0, 15, 0, 18, 0, 21, 0, 23,
+   0, 26, 0, 28, 0, 30, 0, 32, 0, 34, 0, 36, 0, 38, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 120, 0, 108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61,
+   0, 53, 0, 47, 0, 40, 0, 35, 0, 29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7,
+   0, 3, 0, 0, 3, 0, 6, 0, 9, 0, 12, 0, 15, 0, 17, 0, 20, 0, 22, 0,
+   24, 0, 27, 0, 29, 0, 31, 0, 33, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 0,
+   108, 0, 97, 0, 86, 0, 77, 0, 69, 0, 61, 0, 53, 0, 47, 0, 40, 0, 35, 0,
+   29, 0, 24, 0, 19, 0, 15, 0, 11, 0, 7, 0, 3, 0, 0, 0, 0, 3, 0, 6,
+   0, 9, 0, 12, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 27, 0, 29, 0, 31,
+   0, 33, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109,
+   0, 98, 0, 88, 0, 79, 0, 71, 0, 63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32,
+   0, 27, 0, 23, 0, 18, 0, 14, 0, 10, 0, 6, 0, 3, 0, 0, 3, 0, 6, 0,
+   9, 0, 11, 0, 14, 0, 17, 0, 19, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0,
+   31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 109, 0, 98, 0, 88, 0, 79, 0, 71, 0,
+   63, 0, 56, 0, 50, 0, 43, 0, 38, 0, 32, 0, 27, 0, 23, 0, 18, 0, 14, 0,
+   10, 0, 6, 0, 3, 0, 0, 0, 0, 3, 0, 6, 0, 9, 0, 11, 0, 14, 0, 17,
+   0, 19, 0, 21, 0, 23, 0, 26, 0, 28, 0, 30, 0, 31, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66,
+   0, 59, 0, 52, 0, 46, 0, 41, 0, 35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13,
+   0, 10, 0, 6, 0, 3, 0, 0, 3, 0, 5, 0, 8, 0, 11, 0, 13, 0, 16, 0,
+   18, 0, 20, 0, 22, 0, 25, 0, 26, 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   110, 0, 99, 0, 90, 0, 81, 0, 73, 0, 66, 0, 59, 0, 52, 0, 46, 0, 41, 0,
+   35, 0, 30, 0, 26, 0, 21, 0, 17, 0, 13, 0, 10, 0, 6, 0, 3, 0, 0, 0,
+   0, 3, 0, 5, 0, 8, 0, 11, 0, 13, 0, 16, 0, 18, 0, 20, 0, 22, 0, 25,
+   0, 26, 0, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110,
+   0, 100, 0, 91, 0, 83, 0, 75, 0, 68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38,
+   0, 33, 0, 29, 0, 24, 0, 20, 0, 16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0,
+   2, 0, 5, 0, 8, 0, 10, 0, 13, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0,
+   25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 110, 0, 100, 0, 91, 0, 83, 0, 75, 0,
+   68, 0, 61, 0, 55, 0, 49, 0, 43, 0, 38, 0, 33, 0, 29, 0, 24, 0, 20, 0,
+   16, 0, 13, 0, 9, 0, 6, 0, 3, 0, 0, 0, 0, 2, 0, 5, 0, 8, 0, 10,
+   0, 13, 0, 15, 0, 17, 0, 20, 0, 22, 0, 24, 0, 25, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 121, 0, 111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70,
+   0, 63, 0, 57, 0, 51, 0, 46, 0, 41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19,
+   0, 15, 0, 12, 0, 9, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0,
+   12, 0, 15, 0, 17, 0, 19, 0, 21, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 0,
+   111, 0, 102, 0, 93, 0, 85, 0, 77, 0, 70, 0, 63, 0, 57, 0, 51, 0, 46, 0,
+   41, 0, 36, 0, 31, 0, 27, 0, 23, 0, 19, 0, 15, 0, 12, 0, 9, 0, 5, 0,
+   2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 15, 0, 17, 0, 19,
+   0, 21, 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112,
+   0, 102, 0, 94, 0, 86, 0, 79, 0, 72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43,
+   0, 38, 0, 34, 0, 30, 0, 26, 0, 22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5,
+   0, 2, 0, 0, 2, 0, 5, 0, 7, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0,
+   20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 102, 0, 94, 0, 86, 0, 79, 0,
+   72, 0, 65, 0, 59, 0, 54, 0, 48, 0, 43, 0, 38, 0, 34, 0, 30, 0, 26, 0,
+   22, 0, 18, 0, 15, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5,
+   0, 7, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 122, 0, 112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74,
+   0, 67, 0, 61, 0, 56, 0, 51, 0, 46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24,
+   0, 21, 0, 17, 0, 14, 0, 11, 0, 8, 0, 5, 0, 2, 0, 0, 2, 0, 5, 0,
+   7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   112, 0, 103, 0, 95, 0, 87, 0, 80, 0, 74, 0, 67, 0, 61, 0, 56, 0, 51, 0,
+   46, 0, 41, 0, 36, 0, 32, 0, 28, 0, 24, 0, 21, 0, 17, 0, 14, 0, 11, 0,
+   8, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13,
+   0, 15, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113,
+   0, 104, 0, 96, 0, 89, 0, 82, 0, 75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48,
+   0, 43, 0, 39, 0, 35, 0, 31, 0, 27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10,
+   0, 7, 0, 5, 0, 2, 0, 0, 2, 0, 4, 0, 7, 0, 9, 0, 11, 0, 13, 0,
+   15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 104, 0, 96, 0, 89, 0, 82, 0,
+   75, 0, 69, 0, 63, 0, 58, 0, 53, 0, 48, 0, 43, 0, 39, 0, 35, 0, 31, 0,
+   27, 0, 23, 0, 20, 0, 17, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0,
+   0, 2, 0, 4, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 122, 0, 113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77,
+   0, 71, 0, 65, 0, 60, 0, 55, 0, 50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29,
+   0, 26, 0, 22, 0, 19, 0, 16, 0, 13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0,
+   2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0,
+   113, 0, 105, 0, 97, 0, 90, 0, 83, 0, 77, 0, 71, 0, 65, 0, 60, 0, 55, 0,
+   50, 0, 45, 0, 41, 0, 37, 0, 33, 0, 29, 0, 26, 0, 22, 0, 19, 0, 16, 0,
+   13, 0, 10, 0, 7, 0, 5, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8,
+   0, 10, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114,
+   0, 106, 0, 98, 0, 91, 0, 85, 0, 78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52,
+   0, 47, 0, 43, 0, 39, 0, 35, 0, 31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15,
+   0, 12, 0, 10, 0, 7, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0,
+   10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 122, 0, 114, 0, 106, 0, 98, 0, 91, 0, 85, 0,
+   78, 0, 72, 0, 67, 0, 62, 0, 57, 0, 52, 0, 47, 0, 43, 0, 39, 0, 35, 0,
+   31, 0, 28, 0, 24, 0, 21, 0, 18, 0, 15, 0, 12, 0, 10, 0, 7, 0, 4, 0,
+   2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 123, 0, 114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80,
+   0, 74, 0, 68, 0, 63, 0, 58, 0, 54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34,
+   0, 30, 0, 27, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4,
+   0, 2, 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   114, 0, 106, 0, 99, 0, 92, 0, 86, 0, 80, 0, 74, 0, 68, 0, 63, 0, 58, 0,
+   54, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 30, 0, 27, 0, 23, 0, 20, 0,
+   17, 0, 15, 0, 12, 0, 9, 0, 7, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4,
+   0, 6, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115,
+   0, 107, 0, 100, 0, 93, 0, 87, 0, 81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55,
+   0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20,
+   0, 17, 0, 14, 0, 11, 0, 9, 0, 6, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0,
+   6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 107, 0, 100, 0, 93, 0, 87, 0,
+   81, 0, 75, 0, 70, 0, 65, 0, 60, 0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0,
+   36, 0, 32, 0, 29, 0, 26, 0, 22, 0, 20, 0, 17, 0, 14, 0, 11, 0, 9, 0,
+   6, 0, 4, 0, 2, 0, 0, 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82,
+   0, 77, 0, 71, 0, 66, 0, 62, 0, 57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37,
+   0, 34, 0, 31, 0, 28, 0, 25, 0, 22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8,
+   0, 6, 0, 4, 0, 2, 0, 0, 2, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   115, 0, 108, 0, 101, 0, 94, 0, 88, 0, 82, 0, 77, 0, 71, 0, 66, 0, 62, 0,
+   57, 0, 53, 0, 49, 0, 45, 0, 41, 0, 37, 0, 34, 0, 31, 0, 28, 0, 25, 0,
+   22, 0, 19, 0, 16, 0, 13, 0, 11, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0,
+   0, 2, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115,
+   0, 108, 0, 102, 0, 95, 0, 89, 0, 83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59,
+   0, 55, 0, 51, 0, 47, 0, 43, 0, 39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24,
+   0, 21, 0, 18, 0, 15, 0, 13, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0,
+   1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 123, 0, 115, 0, 108, 0, 102, 0, 95, 0, 89, 0,
+   83, 0, 78, 0, 73, 0, 68, 0, 63, 0, 59, 0, 55, 0, 51, 0, 47, 0, 43, 0,
+   39, 0, 36, 0, 33, 0, 30, 0, 26, 0, 24, 0, 21, 0, 18, 0, 15, 0, 13, 0,
+   10, 0, 8, 0, 6, 0, 4, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 123, 0, 116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85,
+   0, 79, 0, 74, 0, 69, 0, 65, 0, 60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41,
+   0, 38, 0, 35, 0, 31, 0, 28, 0, 25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12,
+   0, 10, 0, 8, 0, 6, 0, 4, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 0,
+   116, 0, 109, 0, 102, 0, 96, 0, 90, 0, 85, 0, 79, 0, 74, 0, 69, 0, 65, 0,
+   60, 0, 56, 0, 52, 0, 48, 0, 45, 0, 41, 0, 38, 0, 35, 0, 31, 0, 28, 0,
+   25, 0, 23, 0, 20, 0, 17, 0, 15, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0,
+   1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+#endif

From caeb3cdf2f6295b1b82ace2ad3ca6f7c9970fe17 Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 18 Aug 2011 10:23:11 +0300
Subject: [PATCH 459/600] pp: Add Jimenez' MLAA

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/postprocess/pp_mlaa.c | 304 +++++++++++++++++
 src/gallium/auxiliary/postprocess/pp_mlaa.h | 342 ++++++++++++++++++++
 2 files changed, 646 insertions(+)
 create mode 100644 src/gallium/auxiliary/postprocess/pp_mlaa.c
 create mode 100644 src/gallium/auxiliary/postprocess/pp_mlaa.h

diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.c b/src/gallium/auxiliary/postprocess/pp_mlaa.c
new file mode 100644
index 00000000000..476502fca93
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa.c
@@ -0,0 +1,304 @@
+/**
+ * Copyright (C) 2010 Jorge Jimenez (jorge@iryoku.com)
+ * Copyright (C) 2010 Belen Masia (bmasia@unizar.es)
+ * Copyright (C) 2010 Jose I. Echevarria (joseignacioechevarria@gmail.com)
+ * Copyright (C) 2010 Fernando Navarro (fernandn@microsoft.com)
+ * Copyright (C) 2010 Diego Gutierrez (diegog@unizar.es)
+ * Copyright (C) 2011 Lauri Kasanen (cand@gmx.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the following statement:
+ *
+ *       "Uses Jimenez's MLAA. Copyright (C) 2010 by Jorge Jimenez, Belen Masia,
+ *        Jose I. Echevarria, Fernando Navarro and Diego Gutierrez."
+ *
+ *       Only for use in the Mesa project, this point 2 is filled by naming the
+ *       technique Jimenez's MLAA in the Mesa config options.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
+ * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing official
+ * policies, either expressed or implied, of the copyright holders.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include "postprocess/postprocess.h"
+#include "postprocess/pp_mlaa.h"
+#include "postprocess/pp_filters.h"
+#include "util/u_blit.h"
+#include "util/u_box.h"
+#include "util/u_sampler.h"
+#include "util/u_inlines.h"
+#include "pipe/p_screen.h"
+
+#define IMM_SPACE 80
+
+static float constants[] = { 1, 1, 0, 0 };
+static unsigned int dimensions[2] = { 0, 0 };
+
+static struct pipe_resource *constbuf, *areamaptex;
+
+/** Upload the constants. */
+static void
+up_consts(struct pipe_context *pipe)
+{
+   struct pipe_box box;
+
+   u_box_2d(0, 0, sizeof(constants), 1, &box);
+   pipe->transfer_inline_write(pipe, constbuf, 0, PIPE_TRANSFER_WRITE,
+                               &box, constants, sizeof(constants),
+                               sizeof(constants));
+}
+
+/** Run function of the MLAA filter. */
+static void
+pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
+                   struct pipe_resource *out, unsigned int n, bool iscolor)
+{
+
+   struct program *p = ppq->p;
+
+   struct pipe_depth_stencil_alpha_state mstencil;
+   struct pipe_sampler_view v_tmp, *arr[3];
+
+   unsigned int w = p->framebuffer.width;
+   unsigned int h = p->framebuffer.height;
+
+   const struct pipe_stencil_ref ref = { {1} };
+   memset(&mstencil, 0, sizeof(mstencil));
+   cso_set_stencil_ref(p->cso, &ref);
+
+   /* Init the pixel size constant */
+   if (dimensions[0] != p->framebuffer.width ||
+       dimensions[1] != p->framebuffer.height) {
+      constants[0] = 1.0 / p->framebuffer.width;
+      constants[1] = 1.0 / p->framebuffer.height;
+
+      up_consts(p->pipe);
+      dimensions[0] = p->framebuffer.width;
+      dimensions[1] = p->framebuffer.height;
+   }
+
+   p->pipe->set_constant_buffer(p->pipe, PIPE_SHADER_VERTEX, 0, constbuf);
+   p->pipe->set_constant_buffer(p->pipe, PIPE_SHADER_FRAGMENT, 0, constbuf);
+
+   mstencil.stencil[0].enabled = 1;
+   mstencil.stencil[0].valuemask = mstencil.stencil[0].writemask = ~0;
+   mstencil.stencil[0].func = PIPE_FUNC_ALWAYS;
+   mstencil.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP;
+   mstencil.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP;
+   mstencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
+
+   p->framebuffer.zsbuf = ppq->stencils;
+
+   /* First pass: depth edge detection */
+   if (iscolor)
+      pp_filter_setup_in(p, in);
+   else
+      pp_filter_setup_in(p, ppq->depth);
+
+   pp_filter_setup_out(p, ppq->inner_tmp[0]);
+
+   pp_filter_set_fb(p);
+   pp_filter_misc_state(p);
+   cso_set_depth_stencil_alpha(p->cso, &mstencil);
+   p->pipe->clear(p->pipe, PIPE_CLEAR_STENCIL | PIPE_CLEAR_COLOR,
+                  p->clear_color, 0, 0);
+
+   cso_single_sampler(p->cso, 0, &p->sampler_point);
+   cso_single_sampler_done(p->cso);
+   cso_set_fragment_sampler_views(p->cso, 1, &p->view);
+
+   cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]);    /* offsetvs */
+   cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][2]);
+
+   pp_filter_draw(p);
+   pp_filter_end_pass(p);
+
+
+   /* Second pass: blend weights */
+   /* Sampler order: areamap, edgesmap, edgesmapL (reversed, thx compiler) */
+   mstencil.stencil[0].func = PIPE_FUNC_EQUAL;
+   mstencil.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP;
+   cso_set_depth_stencil_alpha(p->cso, &mstencil);
+
+   pp_filter_setup_in(p, areamaptex);
+   pp_filter_setup_out(p, ppq->inner_tmp[1]);
+
+   u_sampler_view_default_template(&v_tmp, ppq->inner_tmp[0],
+                                   ppq->inner_tmp[0]->format);
+   arr[1] = arr[2] = p->pipe->create_sampler_view(p->pipe,
+                                                  ppq->inner_tmp[0], &v_tmp);
+
+   pp_filter_set_clear_fb(p);
+
+   cso_single_sampler(p->cso, 0, &p->sampler_point);
+   cso_single_sampler(p->cso, 1, &p->sampler_point);
+   cso_single_sampler(p->cso, 2, &p->sampler);
+   cso_single_sampler_done(p->cso);
+
+   arr[0] = p->view;
+   cso_set_fragment_sampler_views(p->cso, 3, arr);
+
+   cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]);    /* passvs */
+   cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][3]);
+
+   pp_filter_draw(p);
+   pp_filter_end_pass(p);
+   pipe_sampler_view_reference(&arr[1], NULL);
+
+
+   /* Third pass: smoothed edges */
+   /* Sampler order: colormap, blendmap (wtf compiler) */
+   pp_filter_setup_in(p, ppq->inner_tmp[1]);
+   pp_filter_setup_out(p, out);
+
+   pp_filter_set_fb(p);
+
+   /* Blit the input to the output */
+   util_blit_pixels(p->blitctx, in, 0, 0, 0,
+                    w, h, 0, p->framebuffer.cbufs[0],
+                    0, 0, w, h, 0, PIPE_TEX_MIPFILTER_NEAREST);
+
+   u_sampler_view_default_template(&v_tmp, in, in->format);
+   arr[0] = p->pipe->create_sampler_view(p->pipe, in, &v_tmp);
+
+   cso_single_sampler(p->cso, 0, &p->sampler_point);
+   cso_single_sampler(p->cso, 1, &p->sampler_point);
+   cso_single_sampler_done(p->cso);
+
+   arr[1] = p->view;
+   cso_set_fragment_sampler_views(p->cso, 2, arr);
+
+   cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]);    /* offsetvs */
+   cso_set_fragment_shader_handle(p->cso, ppq->shaders[n][4]);
+
+   p->blend.rt[0].blend_enable = 1;
+   cso_set_blend(p->cso, &p->blend);
+
+   pp_filter_draw(p);
+   pp_filter_end_pass(p);
+   pipe_sampler_view_reference(&arr[0], NULL);
+
+   p->blend.rt[0].blend_enable = 0;
+   p->framebuffer.zsbuf = NULL;
+}
+
+/** The init function of the MLAA filter. */
+static void
+pp_jimenezmlaa_init_run(struct pp_queue_t *ppq, unsigned int n,
+                        unsigned int val, bool iscolor)
+{
+
+   struct pipe_box box;
+   struct pipe_resource res;
+
+   char *tmp_text = calloc(sizeof(blend2fs_1) + sizeof(blend2fs_2) +
+                           IMM_SPACE, sizeof(char));
+
+   constbuf = pipe_buffer_create(ppq->p->screen, PIPE_BIND_CONSTANT_BUFFER,
+                                 PIPE_USAGE_STATIC, sizeof(constants));
+   if (!constbuf) {
+      pp_debug("Failed to allocate constant buffer\n");
+      return;
+   }
+
+
+   pp_debug("mlaa: using %u max search steps\n", val);
+
+   if (!tmp_text) {
+      pp_debug("Failed to allocate shader space\n");
+      return;
+   }
+   sprintf(tmp_text, "%s"
+           "IMM FLT32 {    %.8f,     0.0000,     0.0000,     0.0000}\n"
+           "%s\n", blend2fs_1, (float) val, blend2fs_2);
+
+   memset(&res, 0, sizeof(res));
+
+   res.target = PIPE_TEXTURE_2D;
+   res.format = PIPE_FORMAT_R8G8_UNORM;
+   res.width0 = res.height0 = 165;
+   res.bind = PIPE_BIND_SAMPLER_VIEW;
+   res.usage = PIPE_USAGE_STATIC;
+   res.depth0 = res.array_size = res.nr_samples = 1;
+
+   if (!ppq->p->screen->is_format_supported(ppq->p->screen, res.format,
+                                            res.target, 1, res.bind))
+      pp_debug("Areamap format not supported\n");
+
+   areamaptex = ppq->p->screen->resource_create(ppq->p->screen, &res);
+   u_box_2d(0, 0, 165, 165, &box);
+
+   ppq->p->pipe->transfer_inline_write(ppq->p->pipe, areamaptex, 0,
+                                       PIPE_TRANSFER_WRITE, &box,
+                                       areamap, 165 * 2, sizeof(areamap));
+
+
+
+   ppq->shaders[n][1] = pp_tgsi_to_state(ppq->p->pipe, offsetvs, true,
+                                         "offsetvs");
+   if (iscolor)
+      ppq->shaders[n][2] = pp_tgsi_to_state(ppq->p->pipe, color1fs,
+                                            false, "color1fs");
+   else
+      ppq->shaders[n][2] = pp_tgsi_to_state(ppq->p->pipe, depth1fs,
+                                            false, "depth1fs");
+   ppq->shaders[n][3] = pp_tgsi_to_state(ppq->p->pipe, tmp_text, false,
+                                         "blend2fs");
+   ppq->shaders[n][4] = pp_tgsi_to_state(ppq->p->pipe, neigh3fs, false,
+                                         "neigh3fs");
+
+   free(tmp_text);
+}
+
+/** Short wrapper to init the depth version. */
+void
+pp_jimenezmlaa_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
+{
+
+   pp_jimenezmlaa_init_run(ppq, n, val, false);
+}
+
+/** Short wrapper to init the color version. */
+void
+pp_jimenezmlaa_init_color(struct pp_queue_t *ppq, unsigned int n,
+                          unsigned int val)
+{
+
+   pp_jimenezmlaa_init_run(ppq, n, val, true);
+}
+
+/** Short wrapper to run the depth version. */
+void
+pp_jimenezmlaa(struct pp_queue_t *ppq, struct pipe_resource *in,
+               struct pipe_resource *out, unsigned int n)
+{
+   pp_jimenezmlaa_run(ppq, in, out, n, false);
+}
+
+/** Short wrapper to run the color version. */
+void
+pp_jimenezmlaa_color(struct pp_queue_t *ppq, struct pipe_resource *in,
+                     struct pipe_resource *out, unsigned int n)
+{
+   pp_jimenezmlaa_run(ppq, in, out, n, true);
+}
diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.h b/src/gallium/auxiliary/postprocess/pp_mlaa.h
new file mode 100644
index 00000000000..9972d59c6a6
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa.h
@@ -0,0 +1,342 @@
+/**
+ * Copyright (C) 2010 Jorge Jimenez (jorge@iryoku.com)
+ * Copyright (C) 2010 Belen Masia (bmasia@unizar.es)
+ * Copyright (C) 2010 Jose I. Echevarria (joseignacioechevarria@gmail.com)
+ * Copyright (C) 2010 Fernando Navarro (fernandn@microsoft.com)
+ * Copyright (C) 2010 Diego Gutierrez (diegog@unizar.es)
+ * Copyright (C) 2011 Lauri Kasanen (cand@gmx.com)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the following statement:
+ *
+ *       "Uses Jimenez's MLAA. Copyright (C) 2010 by Jorge Jimenez, Belen Masia,
+ *        Jose I. Echevarria, Fernando Navarro and Diego Gutierrez."
+ *
+ *       Only for use in the Mesa project, this point 2 is filled by naming the
+ *       technique Jimenez's MLAA in the Mesa config options.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
+ * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing official
+ * policies, either expressed or implied, of the copyright holders.
+ */
+
+#ifndef PP_MLAA_H
+#define PP_MLAA_H
+
+#include "postprocess/pp_mlaa_areamap.h"
+
+static const char depth1fs[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL IN[1], GENERIC[10], PERSPECTIVE\n"
+   "DCL IN[2], GENERIC[11], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL TEMP[0..2]\n"
+   "IMM FLT32 {    0.0030,     0.0000,     1.0000,     0.0000}\n"
+   "  0: TEX TEMP[0].x, IN[1].xyyy, SAMP[0], 2D\n"
+   "  1: MOV TEMP[1].x, TEMP[0].xxxx\n"
+   "  2: TEX TEMP[0].x, IN[1].zwww, SAMP[0], 2D\n"
+   "  3: MOV TEMP[1].y, TEMP[0].xxxx\n"
+   "  4: TEX TEMP[0].x, IN[2].xyyy, SAMP[0], 2D\n"
+   "  5: MOV TEMP[1].z, TEMP[0].xxxx\n"
+   "  6: TEX TEMP[0].x, IN[2].zwww, SAMP[0], 2D\n"
+   "  7: MOV TEMP[1].w, TEMP[0].xxxx\n"
+   "  8: TEX TEMP[0].x, IN[0].xyyy, SAMP[0], 2D\n"
+   "  9: ADD TEMP[2], TEMP[0].xxxx, -TEMP[1]\n"
+   " 10: ABS TEMP[0], TEMP[2]\n"
+   " 11: SGE TEMP[2], TEMP[0], IMM[0].xxxx\n"
+   " 12: DP4 TEMP[0].x, TEMP[2], IMM[0].zzzz\n"
+   " 13: SEQ TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy\n"
+   " 14: IF TEMP[1].xxxx :16\n"
+   " 15:   KILP\n"
+   " 16: ENDIF\n"
+   " 17: MOV OUT[0], TEMP[2]\n"
+   " 18: END\n";
+
+
+static const char color1fs[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL IN[1], GENERIC[10], PERSPECTIVE\n"
+   "DCL IN[2], GENERIC[11], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL TEMP[0..2]\n"
+   "IMM FLT32 {    0.2126,     0.7152,     0.0722,     0.1000}\n"
+   "IMM FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}\n"
+   "  0: TEX TEMP[1].xyz, IN[1].xyyy, SAMP[0], 2D\n"
+   "  1: DP3 TEMP[0].x, TEMP[1].xyzz, IMM[0]\n"
+   "  2: TEX TEMP[1].xyz, IN[1].zwww, SAMP[0], 2D\n"
+   "  3: DP3 TEMP[0].y, TEMP[1].xyzz, IMM[0].xyzz\n"
+   "  4: TEX TEMP[1].xyz, IN[2].xyyy, SAMP[0], 2D\n"
+   "  5: DP3 TEMP[0].z, TEMP[1].xyzz, IMM[0].xyzz\n"
+   "  6: TEX TEMP[1].xyz, IN[2].zwww, SAMP[0], 2D\n"
+   "  7: DP3 TEMP[0].w, TEMP[1].xyzz, IMM[0].xyzz\n"
+   "  8: TEX TEMP[1].xyz, IN[0].xyyy, SAMP[0], 2D\n"
+   "  9: DP3 TEMP[2].x, TEMP[1].xyzz, IMM[0].xyzz\n"
+   " 10: ADD TEMP[1], TEMP[2].xxxx, -TEMP[0]\n"
+   " 11: ABS TEMP[0], TEMP[1]\n"
+   " 12: SGE TEMP[2], TEMP[0], IMM[0].wwww\n"
+   " 13: DP4 TEMP[0].x, TEMP[2], IMM[1].xxxx\n"
+   " 14: SEQ TEMP[1].x, TEMP[0].xxxx, IMM[1].yyyy\n"
+   " 15: IF TEMP[1].xxxx :17\n"
+   " 16:   KILP\n"
+   " 17: ENDIF\n"
+   " 18: MOV OUT[0], TEMP[2]\n"
+   " 19: END\n";
+
+
+static const char neigh3fs[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL IN[1], GENERIC[10], PERSPECTIVE\n"
+   "DCL IN[2], GENERIC[11], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL SAMP[1]\n"
+   "DCL TEMP[0..8]\n"
+   "IMM FLT32 {    1.0000,     0.00001,     0.0000,     0.0000}\n"
+   "  0: TEX TEMP[0], IN[0].xyyy, SAMP[1], 2D\n"
+   "  1: MOV TEMP[1].x, TEMP[0].xxxx\n"
+   "  2: TEX TEMP[2].y, IN[2].zwww, SAMP[1], 2D\n"
+   "  3: MOV TEMP[1].y, TEMP[2].yyyy\n"
+   "  4: MOV TEMP[1].z, TEMP[0].zzzz\n"
+   "  5: TEX TEMP[1].w, IN[2].xyyy, SAMP[1], 2D\n"
+   "  6: MUL TEMP[4], TEMP[1], TEMP[1]\n"
+   "  7: MUL TEMP[5], TEMP[4], TEMP[1]\n"
+   "  8: DP4 TEMP[1].x, TEMP[5], IMM[0].xxxx\n"
+   "  9: SLT TEMP[4].x, TEMP[1].xxxx, IMM[0].yyyy\n"
+   " 10: IF TEMP[4].xxxx :12\n"
+   " 11:   KILP\n"
+   " 12: ENDIF\n"
+   " 13: TEX TEMP[4], IN[0].xyyy, SAMP[0], 2D\n"
+   " 14: TEX TEMP[6], IN[1].zwww, SAMP[0], 2D\n"
+   " 15: ADD TEMP[7].x, IMM[0].xxxx, -TEMP[0].xxxx\n"
+   " 16: MUL TEMP[8], TEMP[4], TEMP[7].xxxx\n"
+   " 17: MAD TEMP[7], TEMP[6], TEMP[0].xxxx, TEMP[8]\n"
+   " 18: MUL TEMP[6], TEMP[7], TEMP[5].xxxx\n"
+   " 19: TEX TEMP[7], IN[2].zwww, SAMP[0], 2D\n"
+   " 20: ADD TEMP[8].x, IMM[0].xxxx, -TEMP[2].yyyy\n"
+   " 21: MUL TEMP[3], TEMP[4], TEMP[8].xxxx\n"
+   " 22: MAD TEMP[8], TEMP[7], TEMP[2].yyyy, TEMP[3]\n"
+   " 23: MAD TEMP[2], TEMP[8], TEMP[5].yyyy, TEMP[6]\n"
+   " 24: TEX TEMP[6], IN[1].xyyy, SAMP[0], 2D\n"
+   " 25: ADD TEMP[7].x, IMM[0].xxxx, -TEMP[0].zzzz\n"
+   " 26: MUL TEMP[8], TEMP[4], TEMP[7].xxxx\n"
+   " 27: MAD TEMP[7], TEMP[6], TEMP[0].zzzz, TEMP[8]\n"
+   " 28: MAD TEMP[0], TEMP[7], TEMP[5].zzzz, TEMP[2]\n"
+   " 29: TEX TEMP[2], IN[2].xyyy, SAMP[0], 2D\n"
+   " 30: ADD TEMP[6].x, IMM[0].xxxx, -TEMP[1].wwww\n"
+   " 31: MUL TEMP[7], TEMP[4], TEMP[6].xxxx\n"
+   " 32: MAD TEMP[4], TEMP[2], TEMP[1].wwww, TEMP[7]\n"
+   " 33: MAD TEMP[2], TEMP[4], TEMP[5].wwww, TEMP[0]\n"
+   " 34: RCP TEMP[0].x, TEMP[1].xxxx\n"
+   " 35: MUL OUT[0], TEMP[2], TEMP[0].xxxx\n"
+   " 36: END\n";
+
+
+static const char offsetvs[] = "VERT\n"
+   "DCL IN[0]\n"
+   "DCL IN[1]\n"
+   "DCL OUT[0], POSITION\n"
+   "DCL OUT[1], GENERIC[0]\n"
+   "DCL OUT[2], GENERIC[10]\n"
+   "DCL OUT[3], GENERIC[11]\n"
+   "DCL CONST[0]\n"
+   "IMM FLT32 {    1.0000,     0.0000,    -1.0000,     0.0000}\n"
+   "  0: MOV OUT[0], IN[0]\n"
+   "  1: MOV OUT[1], IN[1]\n"
+   "  2: MAD OUT[2], CONST[0].xyxy, IMM[0].zyyz, IN[1].xyxy\n"
+   "  3: MAD OUT[3], CONST[0].xyxy, IMM[0].xyyx, IN[1].xyxy\n"
+   "  4: END\n";
+
+
+static const char blend2fs_1[] = "FRAG\n"
+   "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1\n"
+   "DCL IN[0], GENERIC[0], PERSPECTIVE\n"
+   "DCL OUT[0], COLOR\n"
+   "DCL SAMP[0]\n"
+   "DCL SAMP[1]\n"
+   "DCL SAMP[2]\n"
+   "DCL CONST[0]\n"
+   "DCL TEMP[0..6]\n"
+   "IMM FLT32 {    0.0000,    -0.2500,     0.00609756,     0.5000}\n"
+   "IMM FLT32 {   -1.5000,    -2.0000,     0.9000,     1.5000}\n"
+   "IMM FLT32 {    2.0000,     1.0000,     4.0000,    33.0000}\n";
+
+static const char blend2fs_2[] =
+   "  0: MOV TEMP[0], IMM[0].xxxx\n"
+   "  1: TEX TEMP[1], IN[0].xyyy, SAMP[1], 2D\n"
+   "  2: MOV TEMP[2].x, TEMP[1]\n"
+   "  3: SNE TEMP[3].x, TEMP[1].yyyy, IMM[0].xxxx\n"
+   "  4: IF TEMP[3].xxxx :76\n"
+   "  5:   MOV TEMP[1].xy, IN[0].xyxx\n"
+   "  6:   MOV TEMP[4].x, IMM[1].xxxx\n"
+   "  7:   BGNLOOP :24\n"
+   "  8:     MUL TEMP[5].x, IMM[1].yyyy, IMM[3].xxxx\n"
+   "  9:     SLE TEMP[6].x, TEMP[4].xxxx, TEMP[5].xxxx\n"
+   " 10:     IF TEMP[6].xxxx :12\n"
+   " 11:       BRK\n"
+   " 12:     ENDIF\n"
+   " 13:     MOV TEMP[4].y, IMM[0].xxxx\n"
+   " 14:     MAD TEMP[3].xyz, CONST[0].xyyy, TEMP[4].xyyy, TEMP[1].xyyy\n"
+   " 15:     MOV TEMP[3].w, IMM[0].xxxx\n"
+   " 16:     TXL TEMP[5], TEMP[3], SAMP[2], 2D\n"
+   " 17:     MOV TEMP[3].x, TEMP[5].yyyy\n"
+   " 18:     SLT TEMP[6].x, TEMP[5].yyyy, IMM[1].zzzz\n"
+   " 19:     IF TEMP[6].xxxx :21\n"
+   " 20:       BRK\n"
+   " 21:     ENDIF\n"
+   " 22:     ADD TEMP[6].x, TEMP[4].xxxx, IMM[1].yyyy\n"
+   " 23:     MOV TEMP[4].x, TEMP[6].xxxx\n"
+   " 24:   ENDLOOP :7\n"
+   " 25:   ADD TEMP[1].x, TEMP[4].xxxx, IMM[1].wwww\n"
+   " 26:   MAD TEMP[6].x, -IMM[2].xxxx, TEMP[3].xxxx, TEMP[1].xxxx\n"
+   " 27:   MUL TEMP[1].x, IMM[1].yyyy, IMM[3].xxxx\n"
+   " 28:   MAX TEMP[4].x, TEMP[6].xxxx, TEMP[1].xxxx\n"
+   " 29:   MOV TEMP[1].x, TEMP[4].xxxx\n"
+   " 30:   MOV TEMP[3].xy, IN[0].xyxx\n"
+   " 31:   MOV TEMP[5].x, IMM[1].wwww\n"
+   " 32:   BGNLOOP :49\n"
+   " 33:     MUL TEMP[6].x, IMM[2].xxxx, IMM[3].xxxx\n"
+   " 34:     SGE TEMP[4].x, TEMP[5].xxxx, TEMP[6].xxxx\n"
+   " 35:     IF TEMP[4].xxxx :37\n"
+   " 36:       BRK\n"
+   " 37:     ENDIF\n"
+   " 38:     MOV TEMP[5].y, IMM[0].xxxx\n"
+   " 39:     MAD TEMP[4].xyz, CONST[0].xyyy, TEMP[5].xyyy, TEMP[3].xyyy\n"
+   " 40:     MOV TEMP[4].w, IMM[0].xxxx\n"
+   " 41:     TXL TEMP[6].xy, TEMP[4], SAMP[2], 2D\n"
+   " 42:     MOV TEMP[4].x, TEMP[6].yyyy\n"
+   " 43:     SLT TEMP[0].x, TEMP[6].yyyy, IMM[1].zzzz\n"
+   " 44:     IF TEMP[0].xxxx :46\n"
+   " 45:       BRK\n"
+   " 46:     ENDIF\n"
+   " 47:     ADD TEMP[6].x, TEMP[5].xxxx, IMM[2].xxxx\n"
+   " 48:     MOV TEMP[5].x, TEMP[6].xxxx\n"
+   " 49:   ENDLOOP :32\n"
+   " 50:   ADD TEMP[3].x, TEMP[5].xxxx, IMM[1].xxxx\n"
+   " 51:   MAD TEMP[5].x, IMM[2].xxxx, TEMP[4].xxxx, TEMP[3].xxxx\n"
+   " 52:   MUL TEMP[3].x, IMM[2].xxxx, IMM[3].xxxx\n"
+   " 53:   MIN TEMP[4].x, TEMP[5].xxxx, TEMP[3].xxxx\n"
+   " 54:   MOV TEMP[3].x, TEMP[1].xxxx\n"
+   " 55:   MOV TEMP[3].y, TEMP[4].xxxx\n"
+   " 56:   MOV TEMP[5].yw, IMM[0].yyyy\n"
+   " 57:   MOV TEMP[5].x, TEMP[1].xxxx\n"
+   " 58:   ADD TEMP[1].x, TEMP[4].xxxx, IMM[2].yyyy\n"
+   " 59:   MOV TEMP[5].z, TEMP[1].xxxx\n"
+   " 60:   MAD TEMP[1], TEMP[5], CONST[0].xyxy, IN[0].xyxy\n"
+   " 61:   MOV TEMP[4], TEMP[1].xyyy\n"
+   " 62:   MOV TEMP[4].w, IMM[0].xxxx\n"
+   " 63:   TXL TEMP[5].x, TEMP[4], SAMP[2], 2D\n"
+   " 64:   MOV TEMP[4].x, TEMP[5].xxxx\n"
+   " 65:   MOV TEMP[5], TEMP[1].zwww\n"
+   " 66:   MOV TEMP[5].w, IMM[0].xxxx\n"
+   " 67:   TXL TEMP[1].x, TEMP[5], SAMP[2], 2D\n"
+   " 68:   MOV TEMP[4].y, TEMP[1].xxxx\n"
+   " 69:   MUL TEMP[5].xy, IMM[2].zzzz, TEMP[4].xyyy\n"
+   " 70:   ROUND TEMP[1].xy, TEMP[5].xyyy\n"
+   " 71:   ABS TEMP[4].xy, TEMP[3].xyyy\n"
+   " 72:   MAD TEMP[3].xy, IMM[2].wwww, TEMP[1].xyyy, TEMP[4].xyyy\n"
+   " 73:   MUL TEMP[5].xyz, TEMP[3].xyyy, IMM[0].zzzz\n"
+   " 74:   MOV TEMP[5].w, IMM[0].xxxx\n"
+   " 75:   TXL TEMP[0].xy, TEMP[5], SAMP[0], 2D\n"
+   " 76: ENDIF\n"
+   " 77: SNE TEMP[1].x, TEMP[2].xxxx, IMM[0].xxxx\n"
+   " 78: IF TEMP[1].xxxx :151\n"
+   " 79:   MOV TEMP[1].xy, IN[0].xyxx\n"
+   " 80:   MOV TEMP[3].x, IMM[1].xxxx\n"
+   " 81:   BGNLOOP :98\n"
+   " 82:     MUL TEMP[4].x, IMM[1].yyyy, IMM[3].xxxx\n"
+   " 83:     SLE TEMP[5].x, TEMP[3].xxxx, TEMP[4].xxxx\n"
+   " 84:     IF TEMP[5].xxxx :86\n"
+   " 85:       BRK\n"
+   " 86:     ENDIF\n"
+   " 87:     MOV TEMP[3].y, IMM[0].xxxx\n"
+   " 88:     MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[3].yxxx, TEMP[1].xyyy\n"
+   " 89:     MOV TEMP[5].w, IMM[0].xxxx\n"
+   " 90:     TXL TEMP[4], TEMP[5], SAMP[2], 2D\n"
+   " 91:     MOV TEMP[2].x, TEMP[4].xxxx\n"
+   " 92:     SLT TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz\n"
+   " 93:     IF TEMP[5].xxxx :95\n"
+   " 94:       BRK\n"
+   " 95:     ENDIF\n"
+   " 96:     ADD TEMP[4].x, TEMP[3].xxxx, IMM[1].yyyy\n"
+   " 97:     MOV TEMP[3].x, TEMP[4].xxxx\n"
+   " 98:   ENDLOOP :81\n"
+   " 99:   ADD TEMP[1].x, TEMP[3].xxxx, IMM[1].wwww\n"
+   "100:   MAD TEMP[6].x, -IMM[2].xxxx, TEMP[2].xxxx, TEMP[1].xxxx\n"
+   "101:   MUL TEMP[1].x, IMM[1].yyyy, IMM[3].xxxx\n"
+   "102:   MAX TEMP[3].x, TEMP[6].xxxx, TEMP[1].xxxx\n"
+   "103:   MOV TEMP[1].x, TEMP[3].xxxx\n"
+   "104:   MOV TEMP[2].xy, IN[0].xyxx\n"
+   "105:   MOV TEMP[4].x, IMM[1].wwww\n"
+   "106:   BGNLOOP :123\n"
+   "107:     MUL TEMP[5].x, IMM[2].xxxx, IMM[3].xxxx\n"
+   "108:     SGE TEMP[6].x, TEMP[4].xxxx, TEMP[5].xxxx\n"
+   "109:     IF TEMP[6].xxxx :111\n"
+   "110:       BRK\n"
+   "111:     ENDIF\n"
+   "112:     MOV TEMP[4].y, IMM[0].xxxx\n"
+   "113:     MAD TEMP[5].xyz, CONST[0].xyyy, TEMP[4].yxxx, TEMP[2].xyyy\n"
+   "114:     MOV TEMP[5].w, IMM[0].xxxx\n"
+   "115:     TXL TEMP[6], TEMP[5], SAMP[2], 2D\n"
+   "116:     MOV TEMP[3].x, TEMP[6].xxxx\n"
+   "117:     SLT TEMP[5].x, TEMP[6].xxxx, IMM[1].zzzz\n"
+   "118:     IF TEMP[5].xxxx :120\n"
+   "119:       BRK\n"
+   "120:     ENDIF\n"
+   "121:     ADD TEMP[6].x, TEMP[4].xxxx, IMM[2].xxxx\n"
+   "122:     MOV TEMP[4].x, TEMP[6].xxxx\n"
+   "123:   ENDLOOP :106\n"
+   "124:   ADD TEMP[2].x, TEMP[4].xxxx, IMM[1].xxxx\n"
+   "125:   MAD TEMP[4].x, IMM[2].xxxx, TEMP[3].xxxx, TEMP[2].xxxx\n"
+   "126:   MUL TEMP[2].x, IMM[2].xxxx, IMM[3].xxxx\n"
+   "127:   MIN TEMP[3].x, TEMP[4].xxxx, TEMP[2].xxxx\n"
+   "128:   MOV TEMP[2].x, TEMP[1].xxxx\n"
+   "129:   MOV TEMP[2].y, TEMP[3].xxxx\n"
+   "130:   MOV TEMP[4].xz, IMM[0].yyyy\n"
+   "131:   MOV TEMP[4].y, TEMP[1].xxxx\n"
+   "132:   ADD TEMP[1].x, TEMP[3].xxxx, IMM[2].yyyy\n"
+   "133:   MOV TEMP[4].w, TEMP[1].xxxx\n"
+   "134:   MAD TEMP[1], TEMP[4], CONST[0].xyxy, IN[0].xyxy\n"
+   "135:   MOV TEMP[3], TEMP[1].xyyy\n"
+   "136:   MOV TEMP[3].w, IMM[0].xxxx\n"
+   "137:   TXL TEMP[4].y, TEMP[3], SAMP[2], 2D\n"
+   "138:   MOV TEMP[3].x, TEMP[4].yyyy\n"
+   "139:   MOV TEMP[4], TEMP[1].zwww\n"
+   "140:   MOV TEMP[4].w, IMM[0].xxxx\n"
+   "141:   TXL TEMP[1].y, TEMP[4], SAMP[2], 2D\n"
+   "142:   MOV TEMP[3].y, TEMP[1].yyyy\n"
+   "143:   MUL TEMP[4].xy, IMM[2].zzzz, TEMP[3].xyyy\n"
+   "144:   ROUND TEMP[1].xy, TEMP[4].xyyy\n"
+   "145:   ABS TEMP[3].xy, TEMP[2].xyyy\n"
+   "146:   MAD TEMP[2].xy, IMM[2].wwww, TEMP[1].xyyy, TEMP[3].xyyy\n"
+   "147:   MUL TEMP[3].xyz, TEMP[2].xyyy, IMM[0].zzzz\n"
+   "148:   MOV TEMP[3].w, IMM[0].xxxx\n"
+   "149:   TXL TEMP[1].xy, TEMP[3], SAMP[0], 2D\n"
+   "150:   MOV TEMP[0].zw, TEMP[1].yyxy\n"
+   "151: ENDIF\n"
+   "152: MOV OUT[0], TEMP[0]\n"
+   "153: END\n";
+
+#endif

From de43cd310ce1d9878efe9d760a781489d7d642e2 Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 18 Aug 2011 10:23:27 +0300
Subject: [PATCH 460/600] pp/main queue: Add pp_run.c

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/postprocess/pp_run.c | 188 +++++++++++++++++++++
 1 file changed, 188 insertions(+)
 create mode 100644 src/gallium/auxiliary/postprocess/pp_run.c

diff --git a/src/gallium/auxiliary/postprocess/pp_run.c b/src/gallium/auxiliary/postprocess/pp_run.c
new file mode 100644
index 00000000000..ce671aea360
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_run.c
@@ -0,0 +1,188 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "postprocess.h"
+
+#include "postprocess/pp_filters.h"
+#include "util/u_blit.h"
+#include "util/u_inlines.h"
+#include "util/u_sampler.h"
+
+/**
+*	Main run function of the PP queue. Called on swapbuffers/flush.
+*
+*	Runs all requested filters in order and handles shuffling the temp
+*	buffers in between.
+*/
+void
+pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
+       struct pipe_resource *out, struct pipe_resource *indepth)
+{
+
+   unsigned int i;
+
+   if (in->width0 != ppq->p->framebuffer.width ||
+       in->height0 != ppq->p->framebuffer.height) {
+      pp_debug("Resizing the temp pp buffers\n");
+      pp_free_fbos(ppq);
+      pp_init_fbos(ppq, in->width0, in->height0, indepth);
+   }
+
+   if (in == out && ppq->n_filters == 1) {
+      /* Make a copy of in to tmp[0] in this case. */
+      unsigned int w = ppq->p->framebuffer.width;
+      unsigned int h = ppq->p->framebuffer.height;
+
+      util_blit_pixels(ppq->p->blitctx, in, 0, 0, 0,
+                       w, h, 0, ppq->tmps[0],
+                       0, 0, w, h, 0, PIPE_TEX_MIPFILTER_NEAREST);
+
+      in = ppq->tmp[0];
+   }
+
+   switch (ppq->n_filters) {
+   case 1:                     /* No temp buf */
+      ppq->pp_queue[0] (ppq, in, out, 0);
+      break;
+   case 2:                     /* One temp buf */
+
+      ppq->pp_queue[0] (ppq, in, ppq->tmp[0], 0);
+      ppq->pp_queue[1] (ppq, ppq->tmp[0], out, 1);
+
+      break;
+   default:                    /* Two temp bufs */
+      ppq->pp_queue[0] (ppq, in, ppq->tmp[0], 0);
+
+      for (i = 1; i < (ppq->n_filters - 1); i++) {
+         if (i % 2 == 0)
+            ppq->pp_queue[i] (ppq, ppq->tmp[1], ppq->tmp[0], i);
+
+         else
+            ppq->pp_queue[i] (ppq, ppq->tmp[0], ppq->tmp[1], i);
+      }
+
+      if (i % 2 == 0)
+         ppq->pp_queue[i] (ppq, ppq->tmp[1], out, i);
+
+      else
+         ppq->pp_queue[i] (ppq, ppq->tmp[0], out, i);
+
+      break;
+   }
+}
+
+
+/* Utility functions for the filters. You're not forced to use these if */
+/* your filter is more complicated. */
+
+/** Setup this resource as the filter input. */
+void
+pp_filter_setup_in(struct program *p, struct pipe_resource *in)
+{
+   struct pipe_sampler_view v_tmp;
+   u_sampler_view_default_template(&v_tmp, in, in->format);
+   p->view = p->pipe->create_sampler_view(p->pipe, in, &v_tmp);
+}
+
+/** Setup this resource as the filter output. */
+void
+pp_filter_setup_out(struct program *p, struct pipe_resource *out)
+{
+   p->surf.format = out->format;
+   p->surf.usage = PIPE_BIND_RENDER_TARGET;
+
+   p->framebuffer.cbufs[0] = p->pipe->create_surface(p->pipe, out, &p->surf);
+}
+
+/** Clean up the input and output set with the above. */
+void
+pp_filter_end_pass(struct program *p)
+{
+   pipe_surface_reference(&p->framebuffer.cbufs[0], NULL);
+   pipe_sampler_view_reference(&p->view, NULL);
+}
+
+/**
+*	Convert the TGSI assembly to a runnable shader.
+*
+* We need not care about geometry shaders. All we have is screen quads.
+*/
+void *
+pp_tgsi_to_state(struct pipe_context *pipe, const char *text, bool isvs,
+                 const char *name)
+{
+   struct pipe_shader_state state;
+   struct tgsi_token tokens[PP_MAX_TOKENS];
+
+   if (tgsi_text_translate(text, tokens, Elements(tokens)) == FALSE) {
+      pp_debug("Failed to translate %s\n", name);
+      return NULL;
+   }
+
+   state.tokens = tokens;
+
+   if (isvs)
+      return pipe->create_vs_state(pipe, &state);
+   else
+      return pipe->create_fs_state(pipe, &state);
+}
+
+/** Setup misc state for the filter. */
+void
+pp_filter_misc_state(struct program *p)
+{
+   cso_set_blend(p->cso, &p->blend);
+   cso_set_depth_stencil_alpha(p->cso, &p->depthstencil);
+   cso_set_rasterizer(p->cso, &p->rasterizer);
+   cso_set_viewport(p->cso, &p->viewport);
+
+   cso_set_vertex_elements(p->cso, 2, p->velem);
+}
+
+/** Draw with the filter to the set output. */
+void
+pp_filter_draw(struct program *p)
+{
+   util_draw_vertex_buffer(p->pipe, p->cso, p->vbuf, 0,
+                           PIPE_PRIM_QUADS, 4, 2);
+   p->pipe->flush(p->pipe, NULL);
+}
+
+/** Set the framebuffer as active. */
+void
+pp_filter_set_fb(struct program *p)
+{
+   cso_set_framebuffer(p->cso, &p->framebuffer);
+}
+
+/** Set the framebuffer as active and clear it. */
+void
+pp_filter_set_clear_fb(struct program *p)
+{
+   cso_set_framebuffer(p->cso, &p->framebuffer);
+   p->pipe->clear(p->pipe, PIPE_CLEAR_COLOR, p->clear_color, 0, 0);
+}

From 0d383d4790040ecf1f2f73b1d0adaf42657496b7 Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 18 Aug 2011 10:23:41 +0300
Subject: [PATCH 461/600] pp/main queue: Add pp_init.c

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/postprocess/pp_init.c | 283 ++++++++++++++++++++
 1 file changed, 283 insertions(+)
 create mode 100644 src/gallium/auxiliary/postprocess/pp_init.c

diff --git a/src/gallium/auxiliary/postprocess/pp_init.c b/src/gallium/auxiliary/postprocess/pp_init.c
new file mode 100644
index 00000000000..75417999b7e
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_init.c
@@ -0,0 +1,283 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include "postprocess/filters.h"
+
+#include "pipe/p_screen.h"
+#include "util/u_inlines.h"
+#include "util/u_blit.h"
+#include "util/u_math.h"
+#include "cso_cache/cso_context.h"
+
+/** Initialize the post-processing queue. */
+struct pp_queue_t *
+pp_init(struct pipe_screen *pscreen, const unsigned int *enabled)
+{
+
+   unsigned int curpos = 0, i, tmp_req = 0;
+   struct pp_queue_t *ppq;
+   pp_func *tmp_q;
+
+   pp_debug("Initializing the post-processing queue.\n");
+
+   /* How many filters were requested? */
+   for (i = 0; i < PP_FILTERS; i++) {
+      if (enabled[i])
+         curpos++;
+   }
+   if (!curpos)
+      return NULL;
+
+   ppq = calloc(1, sizeof(struct pp_queue_t));
+   tmp_q = calloc(curpos, sizeof(pp_func));
+   ppq->shaders = calloc(curpos, sizeof(void *));
+   ppq->verts = calloc(curpos, sizeof(unsigned int));
+
+   if (!tmp_q || !ppq || !ppq->shaders || !ppq->verts)
+      goto error;
+
+   ppq->p = pp_init_prog(ppq, pscreen);
+   if (!ppq->p)
+      goto error;
+
+   /* Add the enabled filters to the queue, in order */
+   curpos = 0;
+   ppq->pp_queue = tmp_q;
+   for (i = 0; i < PP_FILTERS; i++) {
+      if (enabled[i]) {
+         ppq->pp_queue[curpos] = pp_filters[i].main;
+         tmp_req = MAX2(tmp_req, pp_filters[i].inner_tmps);
+
+         if (pp_filters[i].shaders) {
+            ppq->shaders[curpos] =
+               calloc(pp_filters[i].shaders + 1, sizeof(void *));
+            ppq->verts[curpos] = pp_filters[i].verts;
+            if (!ppq->shaders[curpos])
+               goto error;
+         }
+         pp_filters[i].init(ppq, curpos, enabled[i]);
+
+         curpos++;
+      }
+   }
+
+   ppq->p->blitctx = util_create_blit(ppq->p->pipe, ppq->p->cso);
+   if (!ppq->p->blitctx)
+      goto error;
+
+   ppq->n_filters = curpos;
+   ppq->n_tmp = (curpos > 2 ? 2 : 1);
+   ppq->n_inner_tmp = tmp_req;
+
+   ppq->fbos_init = false;
+
+   for (i = 0; i < curpos; i++)
+      ppq->shaders[i][0] = ppq->p->passvs;
+
+   pp_debug("Queue successfully allocated. %u filter(s).\n", curpos);
+
+   return ppq;
+
+ error:
+   pp_debug("Error setting up pp\n");
+
+   if (ppq)
+      free(ppq->p);
+   free(ppq);
+   free(tmp_q);
+
+   return NULL;
+}
+
+/** Free any allocated FBOs (temp buffers). Called after resizing for example. */
+void
+pp_free_fbos(struct pp_queue_t *ppq)
+{
+
+   unsigned int i;
+
+   if (!ppq->fbos_init)
+      return;
+
+   for (i = 0; i < ppq->n_tmp; i++) {
+      pipe_surface_reference(&ppq->tmps[i], NULL);
+      pipe_resource_reference(&ppq->tmp[i], NULL);
+   }
+   for (i = 0; i < ppq->n_inner_tmp; i++) {
+      pipe_surface_reference(&ppq->inner_tmps[i], NULL);
+      pipe_resource_reference(&ppq->inner_tmp[i], NULL);
+   }
+   pipe_surface_reference(&ppq->stencils, NULL);
+   pipe_resource_reference(&ppq->stencil, NULL);
+
+   ppq->fbos_init = false;
+}
+
+/** Free the pp queue. Called on context termination. */
+void
+pp_free(struct pp_queue_t *ppq)
+{
+
+   unsigned int i, j;
+
+   pp_free_fbos(ppq);
+
+   util_destroy_blit(ppq->p->blitctx);
+
+   cso_set_fragment_sampler_views(ppq->p->cso, 0, NULL);
+   cso_release_all(ppq->p->cso);
+
+   for (i = 0; i < ppq->n_filters; i++) {
+      for (j = 0; j < PP_MAX_PASSES && ppq->shaders[i][j]; j++) {
+         if (j >= ppq->verts[i]) {
+            ppq->p->pipe->delete_fs_state(ppq->p->pipe, ppq->shaders[i][j]);
+            ppq->shaders[i][j] = NULL;
+         }
+         else if (ppq->shaders[i][j] != ppq->p->passvs) {
+            ppq->p->pipe->delete_vs_state(ppq->p->pipe, ppq->shaders[i][j]);
+            ppq->shaders[i][j] = NULL;
+         }
+      }
+   }
+
+   cso_destroy_context(ppq->p->cso);
+   ppq->p->pipe->destroy(ppq->p->pipe);
+
+   free(ppq->p);
+   free(ppq->pp_queue);
+   free(ppq);
+
+   pp_debug("Queue taken down.\n");
+}
+
+/** Internal debug function. Should be available to final users. */
+void
+pp_debug(const char *fmt, ...)
+{
+   va_list ap;
+
+   if (!getenv("PP_DEBUG"))
+      return;
+
+   va_start(ap, fmt);
+   vfprintf(stderr, fmt, ap);
+   va_end(ap);
+}
+
+/** Allocate the temp FBOs. Called on makecurrent and resize. */
+void
+pp_init_fbos(struct pp_queue_t *ppq, const unsigned int w,
+             const unsigned int h, struct pipe_resource *indepth)
+{
+
+   struct program *p = ppq->p;  /* The lazy will inherit the earth */
+
+   unsigned int i;
+   struct pipe_resource tmp_res;
+
+   if (ppq->fbos_init)
+      return;
+
+   pp_debug("Initializing FBOs, size %ux%u\n", w, h);
+   pp_debug("Requesting %u temps and %u inner temps\n", ppq->n_tmp,
+            ppq->n_inner_tmp);
+
+   memset(&tmp_res, 0, sizeof(tmp_res));
+   tmp_res.target = PIPE_TEXTURE_2D;
+   tmp_res.format = p->surf.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+   tmp_res.width0 = w;
+   tmp_res.height0 = h;
+   tmp_res.depth0 = 1;
+   tmp_res.array_size = 1;
+   tmp_res.last_level = 0;
+   tmp_res.bind = p->surf.usage = PIPE_BIND_RENDER_TARGET;
+
+   if (!p->screen->is_format_supported(p->screen, tmp_res.format,
+                                       tmp_res.target, 1, tmp_res.bind))
+      pp_debug("Temp buffers' format fail\n");
+
+   for (i = 0; i < ppq->n_tmp; i++) {
+      ppq->tmp[i] = p->screen->resource_create(p->screen, &tmp_res);
+      ppq->tmps[i] = p->pipe->create_surface(p->pipe, ppq->tmp[i], &p->surf);
+
+      if (!ppq->tmp[i] || !ppq->tmps[i])
+         goto error;
+   }
+
+   for (i = 0; i < ppq->n_inner_tmp; i++) {
+      ppq->inner_tmp[i] = p->screen->resource_create(p->screen, &tmp_res);
+      ppq->inner_tmps[i] = p->pipe->create_surface(p->pipe,
+                                                   ppq->inner_tmp[i],
+                                                   &p->surf);
+
+      if (!ppq->inner_tmp[i] || !ppq->inner_tmps[i])
+         goto error;
+   }
+
+   tmp_res.format = p->surf.format = indepth->format;
+   tmp_res.bind = p->surf.usage = PIPE_BIND_DEPTH_STENCIL;
+   ppq->depth = indepth;
+   if (!ppq->depth)
+      goto error;
+
+   tmp_res.format = p->surf.format = PIPE_FORMAT_S8_USCALED_Z24_UNORM;
+
+   if (!p->screen->is_format_supported(p->screen, tmp_res.format,
+                                       tmp_res.target, 1, tmp_res.bind)) {
+
+      tmp_res.format = p->surf.format = PIPE_FORMAT_Z24_UNORM_S8_USCALED;
+
+      if (!p->screen->is_format_supported(p->screen, tmp_res.format,
+                                          tmp_res.target, 1, tmp_res.bind))
+         pp_debug("Temp Sbuffer format fail\n");
+   }
+
+   ppq->stencil = p->screen->resource_create(p->screen, &tmp_res);
+   ppq->stencils = p->pipe->create_surface(p->pipe, ppq->stencil, &p->surf);
+   if (!ppq->stencil || !ppq->stencils)
+      goto error;
+
+
+   p->framebuffer.width = w;
+   p->framebuffer.height = h;
+
+   p->viewport.scale[0] = p->viewport.translate[0] = (float) w / 2.0;
+   p->viewport.scale[1] = p->viewport.translate[1] = (float) h / 2.0;
+   p->viewport.scale[3] = 1.0f;
+   p->viewport.translate[3] = 0.0f;
+
+   ppq->fbos_init = true;
+
+   return;
+
+ error:
+   pp_debug("Failed to allocate temp buffers!\n");
+}

From 85d2ee59d9dd71b829f7356e9080103fe0b3251a Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 18 Aug 2011 10:23:55 +0300
Subject: [PATCH 462/600] pp/main queue: Add pp_program.[ch]

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 .../auxiliary/postprocess/pp_program.c        | 137 ++++++++++++++++++
 .../auxiliary/postprocess/pp_program.h        |  64 ++++++++
 2 files changed, 201 insertions(+)
 create mode 100644 src/gallium/auxiliary/postprocess/pp_program.c
 create mode 100644 src/gallium/auxiliary/postprocess/pp_program.h

diff --git a/src/gallium/auxiliary/postprocess/pp_program.c b/src/gallium/auxiliary/postprocess/pp_program.c
new file mode 100644
index 00000000000..6ec8625dc1b
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_program.c
@@ -0,0 +1,137 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Jakob Bornecrantz
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "postprocess/postprocess.h"
+#include "cso_cache/cso_context.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_inlines.h"
+#include "util/u_simple_shaders.h"
+
+/** Initialize the internal details */
+struct program *
+pp_init_prog(struct pp_queue_t *ppq, struct pipe_screen *pscreen)
+{
+
+   struct program *p = calloc(1, sizeof(struct program));
+
+   pp_debug("Initializing program\n");
+   if (!pscreen)
+      return NULL;
+
+   if (!p)
+      return NULL;
+
+   p->screen = pscreen;
+   p->pipe = pscreen->context_create(pscreen, NULL);
+   p->cso = cso_create_context(p->pipe);
+
+   {
+      static const float verts[4][2][4] = {
+         {
+          {1.0f, 1.0f, 0.0f, 1.0f},
+          {1.0f, 1.0f, 0.0f, 1.0f}
+          },
+         {
+          {-1.0f, 1.0f, 0.0f, 1.0f},
+          {0.0f, 1.0f, 0.0f, 1.0f}
+          },
+         {
+          {-1.0f, -1.0f, 0.0f, 1.0f},
+          {0.0f, 0.0f, 0.0f, 1.0f}
+          },
+         {
+          {1.0f, -1.0f, 0.0f, 1.0f},
+          {1.0f, 0.0f, 0.0f, 1.0f}
+          }
+      };
+
+      p->vbuf = pipe_buffer_create(pscreen, PIPE_BIND_VERTEX_BUFFER,
+                                   PIPE_USAGE_STATIC, sizeof(verts));
+      pipe_buffer_write(p->pipe, p->vbuf, 0, sizeof(verts), verts);
+   }
+
+   p->blend.rt[0].colormask = PIPE_MASK_RGBA;
+   p->blend.rt[0].rgb_src_factor = p->blend.rt[0].alpha_src_factor =
+      PIPE_BLENDFACTOR_SRC_ALPHA;
+   p->blend.rt[0].rgb_dst_factor = p->blend.rt[0].alpha_dst_factor =
+      PIPE_BLENDFACTOR_INV_SRC_ALPHA;
+
+   p->rasterizer.cull_face = PIPE_FACE_NONE;
+   p->rasterizer.gl_rasterization_rules = 1;
+
+   p->sampler.wrap_s = p->sampler.wrap_t = p->sampler.wrap_r =
+      PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+
+   p->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   p->sampler.min_img_filter = p->sampler.mag_img_filter =
+      PIPE_TEX_FILTER_LINEAR;
+   p->sampler.normalized_coords = 1;
+
+   p->sampler_point.wrap_s = p->sampler_point.wrap_t =
+      p->sampler_point.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   p->sampler_point.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   p->sampler_point.min_img_filter = p->sampler_point.mag_img_filter =
+      PIPE_TEX_FILTER_NEAREST;
+   p->sampler_point.normalized_coords = 1;
+
+   p->velem[0].src_offset = 0;
+   p->velem[0].instance_divisor = 0;
+   p->velem[0].vertex_buffer_index = 0;
+   p->velem[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   p->velem[1].src_offset = 1 * 4 * sizeof(float);
+   p->velem[1].instance_divisor = 0;
+   p->velem[1].vertex_buffer_index = 0;
+   p->velem[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+
+   if (!p->screen->is_format_supported(p->screen,
+                                       PIPE_FORMAT_R32G32B32A32_FLOAT,
+                                       PIPE_BUFFER, 1,
+                                       PIPE_BIND_VERTEX_BUFFER))
+      pp_debug("Vertex buf format fail\n");
+
+
+   {
+      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+         TGSI_SEMANTIC_GENERIC
+      };
+      const uint semantic_indexes[] = { 0, 0 };
+      p->passvs = util_make_vertex_passthrough_shader(p->pipe, 2,
+                                                      semantic_names,
+                                                      semantic_indexes);
+   }
+
+   p->framebuffer.nr_cbufs = 1;
+
+   p->surf.usage = PIPE_BIND_RENDER_TARGET;
+   p->surf.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+
+   return p;
+}
diff --git a/src/gallium/auxiliary/postprocess/pp_program.h b/src/gallium/auxiliary/postprocess/pp_program.h
new file mode 100644
index 00000000000..2749b35b372
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_program.h
@@ -0,0 +1,64 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Jakob Bornecrantz
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef PP_PROGRAM_H
+#define PP_PROGRAM_H
+
+#include "pipe/p_state.h"
+
+/**
+*	Internal control details.
+*/
+struct program
+{
+   struct pipe_screen *screen;
+   struct pipe_context *pipe;
+   struct cso_context *cso;
+
+   struct pipe_blend_state blend;
+   struct pipe_depth_stencil_alpha_state depthstencil;
+   struct pipe_rasterizer_state rasterizer;
+   struct pipe_sampler_state sampler;   /* bilinear */
+   struct pipe_sampler_state sampler_point;     /* point */
+   struct pipe_viewport_state viewport;
+   struct pipe_framebuffer_state framebuffer;
+   struct pipe_vertex_element velem[2];
+
+   float clear_color[4];
+
+   void *passvs;
+
+   struct pipe_resource *vbuf;
+   struct pipe_surface surf;
+   struct pipe_sampler_view *view;
+
+   struct blit_state *blitctx;
+};
+
+
+#endif

From 88bc4eda0f93266e1e5dc40f933872c78d497aa6 Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 18 Aug 2011 10:24:09 +0300
Subject: [PATCH 463/600] pp/main queue: Add the PP headers

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/postprocess/filters.h   |  58 ++++++++++
 .../auxiliary/postprocess/postprocess.h       | 100 ++++++++++++++++++
 .../auxiliary/postprocess/pp_filters.h        |  57 ++++++++++
 3 files changed, 215 insertions(+)
 create mode 100644 src/gallium/auxiliary/postprocess/filters.h
 create mode 100644 src/gallium/auxiliary/postprocess/postprocess.h
 create mode 100644 src/gallium/auxiliary/postprocess/pp_filters.h

diff --git a/src/gallium/auxiliary/postprocess/filters.h b/src/gallium/auxiliary/postprocess/filters.h
new file mode 100644
index 00000000000..2454088707d
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/filters.h
@@ -0,0 +1,58 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef PP_EXTERNAL_FILTERS_H
+#define PP_EXTERNAL_FILTERS_H
+
+#include "postprocess/postprocess.h"
+
+typedef void (*pp_init_func) (struct pp_queue_t *, unsigned int,
+                              unsigned int);
+
+struct pp_filter_t
+{
+   const char *name;            /* Config name */
+   unsigned int inner_tmps;     /* Request how many inner temps */
+   unsigned int shaders;        /* Request how many shaders */
+   unsigned int verts;          /* How many are vertex shaders */
+   pp_init_func init;           /* Init function */
+   pp_func main;                /* Run function */
+};
+
+/*	Order matters. Put new filters in a suitable place. */
+
+static const struct pp_filter_t pp_filters[PP_FILTERS] = {
+/*    name			inner	shaders	verts	init			run */
+   { "pp_noblue",		0,	2,	1,	pp_noblue_init,		pp_nocolor },
+   { "pp_nogreen",		0,	2,	1,	pp_nogreen_init,	pp_nocolor },
+   { "pp_nored",		0,	2,	1,	pp_nored_init,		pp_nocolor },
+   { "pp_celshade",		0,	2,	1,	pp_celshade_init,	pp_nocolor },
+   { "pp_jimenezmlaa",		2,	5,	2,	pp_jimenezmlaa_init,	pp_jimenezmlaa },
+   { "pp_jimenezmlaa_color",	2,	5,	2,	pp_jimenezmlaa_init_color, pp_jimenezmlaa_color },
+};
+
+#endif
diff --git a/src/gallium/auxiliary/postprocess/postprocess.h b/src/gallium/auxiliary/postprocess/postprocess.h
new file mode 100644
index 00000000000..ef94f79997a
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/postprocess.h
@@ -0,0 +1,100 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef POSTPROCESS_H
+#define POSTPROCESS_H
+
+#include "postprocess/pp_program.h"
+
+#define PP_FILTERS 6            /* Increment this if you add filters */
+#define PP_MAX_PASSES 6
+
+struct pp_queue_t;              /* Forward definition */
+
+/* Less typing later on */
+typedef void (*pp_func) (struct pp_queue_t *, struct pipe_resource *,
+                         struct pipe_resource *, unsigned int);
+/**
+*	The main post-processing queue.
+*/
+struct pp_queue_t
+{
+   pp_func *pp_queue;           /* An array of pp_funcs */
+   unsigned int n_filters;      /* Number of enabled filters */
+
+   struct pipe_resource *tmp[2];        /* Two temp FBOs for the queue */
+   struct pipe_resource *inner_tmp[3];  /* Three for filter use */
+
+   unsigned int n_tmp, n_inner_tmp;
+
+   struct pipe_resource *depth; /* depth of original input */
+   struct pipe_resource *stencil;       /* stencil shared by inner_tmps */
+
+   struct pipe_surface *tmps[2], *inner_tmps[3], *stencils;
+
+   void ***shaders;             /* Shaders in TGSI form */
+   unsigned int *verts;
+   struct program *p;
+
+   bool fbos_init;
+};
+
+/* Main functions */
+
+struct pp_queue_t *pp_init(struct pipe_screen *, const unsigned int *);
+void pp_run(struct pp_queue_t *, struct pipe_resource *,
+            struct pipe_resource *, struct pipe_resource *);
+void pp_free(struct pp_queue_t *);
+void pp_free_fbos(struct pp_queue_t *);
+void pp_debug(const char *, ...);
+struct program *pp_init_prog(struct pp_queue_t *, struct pipe_screen *);
+void pp_init_fbos(struct pp_queue_t *, unsigned int, unsigned int,
+                  struct pipe_resource *);
+
+/* The filters */
+
+void pp_nocolor(struct pp_queue_t *, struct pipe_resource *,
+                struct pipe_resource *, unsigned int);
+
+void pp_jimenezmlaa(struct pp_queue_t *, struct pipe_resource *,
+                    struct pipe_resource *, unsigned int);
+void pp_jimenezmlaa_color(struct pp_queue_t *, struct pipe_resource *,
+                          struct pipe_resource *, unsigned int);
+
+/* The filter init functions */
+
+void pp_celshade_init(struct pp_queue_t *, unsigned int, unsigned int);
+
+void pp_nored_init(struct pp_queue_t *, unsigned int, unsigned int);
+void pp_nogreen_init(struct pp_queue_t *, unsigned int, unsigned int);
+void pp_noblue_init(struct pp_queue_t *, unsigned int, unsigned int);
+
+void pp_jimenezmlaa_init(struct pp_queue_t *, unsigned int, unsigned int);
+void pp_jimenezmlaa_init_color(struct pp_queue_t *, unsigned int,
+                               unsigned int);
+
+#endif
diff --git a/src/gallium/auxiliary/postprocess/pp_filters.h b/src/gallium/auxiliary/postprocess/pp_filters.h
new file mode 100644
index 00000000000..0e34bb6d20f
--- /dev/null
+++ b/src/gallium/auxiliary/postprocess/pp_filters.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Lauri Kasanen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef PP_FILTERS_H
+#define PP_FILTERS_H
+
+/* Internal include, mainly for the filters */
+
+#include "cso_cache/cso_context.h"
+#include "pipe/p_context.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_text.h"
+#include "util/u_memory.h"
+#include "util/u_draw_quad.h"
+
+#define PP_MAX_TOKENS 2048
+
+
+/* Helper functions for the filters */
+
+void pp_filter_setup_in(struct program *, struct pipe_resource *);
+void pp_filter_setup_out(struct program *, struct pipe_resource *);
+void pp_filter_end_pass(struct program *);
+void *pp_tgsi_to_state(struct pipe_context *, const char *, bool,
+                       const char *);
+void pp_filter_misc_state(struct program *);
+void pp_filter_draw(struct program *);
+void pp_filter_set_fb(struct program *);
+void pp_filter_set_clear_fb(struct program *);
+
+
+#endif

From eb5454f20a7ad998f2789d9b2a91adcd41c887b6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 16 Aug 2011 14:18:51 -0700
Subject: [PATCH 464/600] i965/vs: Remove stale comment about compressed
 instructions.

This was copy'n'paste from the fragment shader, and didn't make sense
here.
---
 src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index d5fd21d99a4..70395533119 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -63,7 +63,6 @@ vec4_visitor::reg_allocate_trivial()
       }
    }
 
-   /* Note that compressed instructions require alignment to 2 registers. */
    hw_reg_mapping[0] = this->first_non_payload_grf;
    next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
    for (i = 1; i < this->virtual_grf_count; i++) {

From 3dadc1e3cceac80a1b63cad2e10f0e0f8904531b Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 16 Aug 2011 15:09:48 -0700
Subject: [PATCH 465/600] i965/vs: Copy the live intervals calculation over
 from the FS.

This is a rather pessimistic calculation, since it doesn't distinguish
individual channels of a vec4, or elements of an array, but should be
a minimum start for register allocation.
---
 src/mesa/drivers/dri/i965/Makefile            |   1 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp        | 130 ++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4.h          |   5 +
 .../drivers/dri/i965/brw_vec4_visitor.cpp     |   3 +
 4 files changed, 139 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_vec4.cpp

diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 45a5350a383..d9c885da65b 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -125,6 +125,7 @@ CXX_SOURCES = \
 	brw_fs_schedule_instructions.cpp \
 	brw_fs_vector_splitting.cpp \
 	brw_shader.cpp \
+	brw_vec4.cpp \
 	brw_vec4_emit.cpp \
 	brw_vec4_reg_allocate.cpp \
 	brw_vec4_visitor.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
new file mode 100644
index 00000000000..a3ed31a9da0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+extern "C" {
+#include "main/macros.h"
+#include "program/prog_parameter.h"
+}
+
+#define MAX_INSTRUCTION (1 << 30)
+
+namespace brw {
+
+void
+vec4_visitor::calculate_live_intervals()
+{
+   int *def = ralloc_array(mem_ctx, int, virtual_grf_count);
+   int *use = ralloc_array(mem_ctx, int, virtual_grf_count);
+   int loop_depth = 0;
+   int loop_start = 0;
+
+   if (this->live_intervals_valid)
+      return;
+
+   for (int i = 0; i < virtual_grf_count; i++) {
+      def[i] = MAX_INSTRUCTION;
+      use[i] = -1;
+   }
+
+   int ip = 0;
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      if (inst->opcode == BRW_OPCODE_DO) {
+	 if (loop_depth++ == 0)
+	    loop_start = ip;
+      } else if (inst->opcode == BRW_OPCODE_WHILE) {
+	 loop_depth--;
+
+	 if (loop_depth == 0) {
+	    /* Patches up the use of vars marked for being live across
+	     * the whole loop.
+	     */
+	    for (int i = 0; i < virtual_grf_count; i++) {
+	       if (use[i] == loop_start) {
+		  use[i] = ip;
+	       }
+	    }
+	 }
+      } else {
+	 for (unsigned int i = 0; i < 3; i++) {
+	    if (inst->src[i].file == GRF) {
+	       int reg = inst->src[i].reg;
+
+	       if (!loop_depth) {
+		  use[reg] = ip;
+	       } else {
+		  def[reg] = MIN2(loop_start, def[reg]);
+		  use[reg] = loop_start;
+
+		  /* Nobody else is going to go smash our start to
+		   * later in the loop now, because def[reg] now
+		   * points before the bb header.
+		   */
+	       }
+	    }
+	 }
+	 if (inst->dst.file == GRF) {
+	    int reg = inst->dst.reg;
+
+	    if (!loop_depth) {
+	       def[reg] = MIN2(def[reg], ip);
+	    } else {
+	       def[reg] = MIN2(def[reg], loop_start);
+	    }
+	 }
+      }
+
+      ip++;
+   }
+
+   ralloc_free(this->virtual_grf_def);
+   ralloc_free(this->virtual_grf_use);
+   this->virtual_grf_def = def;
+   this->virtual_grf_use = use;
+
+   this->live_intervals_valid = true;
+}
+
+bool
+vec4_visitor::virtual_grf_interferes(int a, int b)
+{
+   int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
+   int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
+
+   /* We can't handle dead register writes here, without iterating
+    * over the whole instruction stream to find every single dead
+    * write to that register to compare to the live interval of the
+    * other register.  Just assert that dead_code_eliminate() has been
+    * called.
+    */
+   assert((this->virtual_grf_use[a] != -1 ||
+	   this->virtual_grf_def[a] == MAX_INSTRUCTION) &&
+	  (this->virtual_grf_use[b] != -1 ||
+	   this->virtual_grf_def[b] == MAX_INSTRUCTION));
+
+   return start < end;
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 620b05570a6..77a28c7cda7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -320,6 +320,9 @@ public:
    int virtual_grf_count;
    int virtual_grf_array_size;
    int first_non_payload_grf;
+   int *virtual_grf_def;
+   int *virtual_grf_use;
+   bool live_intervals_valid;
 
    dst_reg *variable_storage(ir_variable *var);
 
@@ -377,6 +380,8 @@ public:
    void reg_allocate_trivial();
    void reg_allocate();
    void move_grf_array_access_to_scratch();
+   void calculate_live_intervals();
+   bool virtual_grf_interferes(int a, int b);
 
    vec4_instruction *emit(enum opcode opcode);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index a60fc5f6ada..b3a07bd0539 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2109,9 +2109,12 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
 				       hash_table_pointer_hash,
 				       hash_table_pointer_compare);
 
+   this->virtual_grf_def = NULL;
+   this->virtual_grf_use = NULL;
    this->virtual_grf_sizes = NULL;
    this->virtual_grf_count = 0;
    this->virtual_grf_array_size = 0;
+   this->live_intervals_valid = false;
 
    this->uniforms = 0;
 

From 8174945d3346dc049ae56dcb4bf1eab39f5c88aa Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 17 Aug 2011 10:50:17 -0700
Subject: [PATCH 466/600] i965/vs: Add simple dead code elimination.

This is copied right from the fragment shader.  It is needed for real
register allocation to work correctly.
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp      | 31 +++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4.h        |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp |  6 ++++
 3 files changed, 38 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index a3ed31a9da0..760bc1f7acd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -127,4 +127,35 @@ vec4_visitor::virtual_grf_interferes(int a, int b)
    return start < end;
 }
 
+/**
+ * Must be called after calculate_live_intervales() to remove unused
+ * writes to registers -- register allocation will fail otherwise
+ * because something deffed but not used won't be considered to
+ * interfere with other regs.
+ */
+bool
+vec4_visitor::dead_code_eliminate()
+{
+   bool progress = false;
+   int pc = 0;
+
+   calculate_live_intervals();
+
+   foreach_list_safe(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
+	 inst->remove();
+	 progress = true;
+      }
+
+      pc++;
+   }
+
+   if (progress)
+      live_intervals_valid = false;
+
+   return progress;
+}
+
 } /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 77a28c7cda7..1db910e2b99 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -381,6 +381,7 @@ public:
    void reg_allocate();
    void move_grf_array_access_to_scratch();
    void calculate_live_intervals();
+   bool dead_code_eliminate();
    bool virtual_grf_interferes(int a, int b);
 
    vec4_instruction *emit(enum opcode opcode);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 011af6f2d3e..65ac7d9dc09 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -558,6 +558,12 @@ vec4_visitor::run()
     */
    move_grf_array_access_to_scratch();
 
+   bool progress;
+   do {
+      progress = false;
+      progress = dead_code_eliminate() || progress;
+   } while (progress);
+
    if (failed)
       return false;
 

From f4db75547f38f08665efac3daf1599fdc5594bb7 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 16 Aug 2011 15:28:53 -0700
Subject: [PATCH 467/600] i965/vs: Implement proper register allocation instead
 of 1:1 mapping.

Fixes vs-atan-* and several others.  This is not the real solution we
eventually want, which will pack floats, vec2s, and vec3s into vec4
registers, but this code should provide the framework for that.
---
 src/mesa/drivers/dri/i965/brw_context.h       |  17 +++
 .../dri/i965/brw_vec4_reg_allocate.cpp        | 139 +++++++++++++++++-
 2 files changed, 155 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index add8c568795..df63fe1d52c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -685,6 +685,23 @@ struct brw_context
 
       uint32_t push_const_offset; /* Offset in the batchbuffer */
       int push_const_size; /* in 256-bit register increments */
+
+      /** @{ register allocator */
+
+      struct ra_regs *regs;
+
+      /**
+       * Array of the ra classes for the unaligned contiguous register
+       * block sizes used.
+       */
+      int *classes;
+
+      /**
+       * Mapping for register-allocated objects in *regs to the first
+       * GRF for that object.
+      */
+      uint8_t *ra_reg_to_grf;
+      /** @} */
    } vs;
 
    struct {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 70395533119..3f052ff64cf 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -21,6 +21,11 @@
  * IN THE SOFTWARE.
  */
 
+extern "C" {
+#include "main/macros.h"
+#include "program/register_allocate.h"
+} /* extern "C" */
+
 #include "brw_vec4.h"
 #include "../glsl/ir_print_visitor.h"
 
@@ -88,10 +93,142 @@ vec4_visitor::reg_allocate_trivial()
    }
 }
 
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+			      int *class_sizes,
+			      int class_count,
+			      int base_reg_count)
+{
+   /* Compute the total number of registers across all classes. */
+   int ra_reg_count = 0;
+   for (int i = 0; i < class_count; i++) {
+      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+   }
+
+   ralloc_free(brw->vs.ra_reg_to_grf);
+   brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+   ralloc_free(brw->vs.regs);
+   brw->vs.regs = ra_alloc_reg_set(ra_reg_count);
+   ralloc_free(brw->vs.classes);
+   brw->vs.classes = ralloc_array(brw, int, class_count + 1);
+
+   /* Now, add the registers to their classes, and add the conflicts
+    * between them and the base GRF registers (and also each other).
+    */
+   int reg = 0;
+   for (int i = 0; i < class_count; i++) {
+      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+      brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
+
+      for (int j = 0; j < class_reg_count; j++) {
+	 ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
+
+	 brw->vs.ra_reg_to_grf[reg] = j;
+
+	 for (int base_reg = j;
+	      base_reg < j + class_sizes[i];
+	      base_reg++) {
+	    ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
+	 }
+
+	 reg++;
+      }
+   }
+   assert(reg == ra_reg_count);
+
+   ra_set_finalize(brw->vs.regs);
+}
+
 void
 vec4_visitor::reg_allocate()
 {
-   reg_allocate_trivial();
+   int hw_reg_mapping[virtual_grf_count];
+   int first_assigned_grf = this->first_non_payload_grf;
+   int base_reg_count = BRW_MAX_GRF - first_assigned_grf;
+   int class_sizes[base_reg_count];
+   int class_count = 0;
+
+   /* Using the trivial allocator can be useful in debugging undefined
+    * register access as a result of broken optimization passes.
+    */
+   if (0) {
+      reg_allocate_trivial();
+      return;
+   }
+
+   calculate_live_intervals();
+
+   /* Set up the register classes.
+    *
+    * The base registers store a vec4.  However, we'll need larger
+    * storage for arrays, structures, and matrices, which will be sets
+    * of contiguous registers.
+    */
+   class_sizes[class_count++] = 1;
+
+   for (int r = 0; r < virtual_grf_count; r++) {
+      int i;
+
+      for (i = 0; i < class_count; i++) {
+	 if (class_sizes[i] == this->virtual_grf_sizes[r])
+	    break;
+      }
+      if (i == class_count) {
+	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
+	    fail("Object too large to register allocate.\n");
+	 }
+
+	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
+      }
+   }
+
+   brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
+
+   struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
+						    virtual_grf_count);
+
+   for (int i = 0; i < virtual_grf_count; i++) {
+      for (int c = 0; c < class_count; c++) {
+	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
+	    ra_set_node_class(g, i, brw->vs.classes[c]);
+	    break;
+	 }
+      }
+
+      for (int j = 0; j < i; j++) {
+	 if (virtual_grf_interferes(i, j)) {
+	    ra_add_node_interference(g, i, j);
+	 }
+      }
+   }
+
+   if (!ra_allocate_no_spills(g)) {
+      ralloc_free(g);
+      fail("No register spilling support yet\n");
+   }
+
+   /* Get the chosen virtual registers for each node, and map virtual
+    * regs in the register classes back down to real hardware reg
+    * numbers.
+    */
+   prog_data->total_grf = first_assigned_grf;
+   for (int i = 0; i < virtual_grf_count; i++) {
+      int reg = ra_get_node_reg(g, i);
+
+      hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
+      prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + 1);
+   }
+
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      assign(hw_reg_mapping, &inst->dst);
+      assign(hw_reg_mapping, &inst->src[0]);
+      assign(hw_reg_mapping, &inst->src[1]);
+      assign(hw_reg_mapping, &inst->src[2]);
+   }
+
+   ralloc_free(g);
 }
 
 } /* namespace brw */

From 64f11db5587dcfb04446dc9521a70ee8ce692b66 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 18 Aug 2011 14:03:26 -0700
Subject: [PATCH 468/600] glsl: Remove unused variable.

---
 src/glsl/lower_if_to_cond_assign.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp
index 94b24db1b29..7b89a1539ce 100644
--- a/src/glsl/lower_if_to_cond_assign.cpp
+++ b/src/glsl/lower_if_to_cond_assign.cpp
@@ -169,7 +169,6 @@ ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
 
    bool found_control_flow = false;
    ir_assignment *assign;
-   ir_dereference_variable *deref;
 
    /* Check that both blocks don't contain anything we can't support. */
    foreach_iter(exec_list_iterator, then_iter, ir->then_instructions) {

From eb86bb55f5faef67c21604db19210c6788592679 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 18 Aug 2011 14:05:11 -0700
Subject: [PATCH 469/600] i965/fs: Change incorrect use of 'struct fs_reg' to
 simply 'fs_reg'.

It's actually a class.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2dc9132cec6..33ad12763f7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -510,7 +510,7 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
 void
 fs_visitor::visit(ir_assignment *ir)
 {
-   struct fs_reg l, r;
+   fs_reg l, r;
    fs_inst *inst;
 
    /* FINISHME: arrays on the lhs */

From 07e9b9049f94ceb443eac1206cc3f9e1e51ac6c1 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 18 Aug 2011 14:08:06 -0700
Subject: [PATCH 470/600] ir_to_mesa: Remove incorrect usage of the 'struct'
 keyword on classes.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/program/ir_to_mesa.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index b1211c1145c..ec3fba182d8 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -581,7 +581,7 @@ ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
    }
 }
 
-struct src_reg
+src_reg
 ir_to_mesa_visitor::src_reg_for_float(float val)
 {
    src_reg src(PROGRAM_CONSTANT, -1, NULL);
@@ -725,7 +725,7 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
 	 }
       }
 
-      struct variable_storage *storage;
+      variable_storage *storage;
       dst_reg dst;
       if (i == ir->num_state_slots) {
 	 /* We'll set the index later. */

From 01d81dedc795005ed235856ce762bb1981655716 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 15 Aug 2011 14:18:16 -0700
Subject: [PATCH 471/600] mesa, glsl_to_tgsi: Add new
 gl_context::NativeIntegers flag.

Previously, native integer support was based on whether the driver
advertised GLSL 1.30 or not.  However, drivers that natively support
integers may wish to do so for older GLSL versions as well.  Adding this
new opt-in flag allows them to do so.

Currently disabled by default on all drivers, which was the existing
behavior (no drivers currently implement GLSL 1.30).

Fixes piglit tests on i965 with INTEL_GLSL_VERSION=130 set:
- spec/glsl-1.10/fs-uniform-int-110.shader_test
- spec/glsl-1.30/fs-uniform-int-130.shader_test
(it was doubly converting the data)

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/main/mtypes.h                     |  6 ++++
 src/mesa/main/uniforms.c                   | 18 +++++-----
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 40 ++++++++++++----------
 3 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 2d5f44c1e7b..8b3650321db 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2717,6 +2717,12 @@ struct gl_constants
 
    GLuint GLSLVersion;  /**< GLSL version supported (ex: 120 = 1.20) */
 
+   /**
+    * Does the driver support real 32-bit integers?  (Otherwise, integers are
+    * simulated via floats.)
+    */
+   GLboolean NativeIntegers;
+
    /** Which texture units support GL_ATI_envmap_bumpmap as targets */
    GLbitfield SupportedBumpUnits;
 
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 1329af4cd7e..cda840fe2d2 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -454,9 +454,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = ctx->Const.GLSLVersion <= 120 ? 
-                     (GLint) prog->Parameters->ParameterValues[base][j].f : 
-                     prog->Parameters->ParameterValues[base][j].i;
+                  params[k++] = ctx->Const.NativeIntegers ?
+                     prog->Parameters->ParameterValues[base][j].i :
+                     (GLint) prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -468,9 +468,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = ctx->Const.GLSLVersion <= 120 ? 
-                     (GLuint) prog->Parameters->ParameterValues[base][j].f : 
-                     prog->Parameters->ParameterValues[base][j].u;
+                  params[k++] = ctx->Const.NativeIntegers ?
+                     prog->Parameters->ParameterValues[base][j].u :
+                     (GLuint) prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -750,7 +750,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
          if (basicType == GL_INT) {
             const GLint *iValues = ((const GLint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
-               if (ctx->Const.GLSLVersion <= 120)
+               if (!ctx->Const.NativeIntegers)
                   uniformVal[i].f = (GLfloat) iValues[i];
                else
                   uniformVal[i].i = iValues[i];
@@ -759,7 +759,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
          else if (basicType == GL_UNSIGNED_INT) {
             const GLuint *iValues = ((const GLuint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
-               if (ctx->Const.GLSLVersion <= 120)
+               if (!ctx->Const.NativeIntegers)
                   uniformVal[i].f = (GLfloat)(GLuint) iValues[i];
                else
                   uniformVal[i].u = iValues[i];
@@ -781,7 +781,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
                else
                   uniformVal[i].b = uniformVal[i].u ? 1 : 0;
                
-               if (ctx->Const.GLSLVersion <= 120)
+               if (!ctx->Const.NativeIntegers)
                   uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
             }
          }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 7b90c812595..886a1776210 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -295,6 +295,7 @@ public:
    bool indirect_addr_consts;
    
    int glsl_version;
+   bool native_integers;
 
    variable_storage *find_variable_storage(ir_variable *var);
 
@@ -600,7 +601,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
    
    if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
       type = GLSL_TYPE_FLOAT;
-   else if (glsl_version >= 130)
+   else if (native_integers)
       type = src0.type;
 
 #define case4(c, f, i, u) \
@@ -881,7 +882,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
    union gl_constant_value uval;
    
-   assert(glsl_version >= 130);
+   assert(native_integers);
 
    uval.i = val;
    src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
@@ -892,7 +893,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
 struct st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
 {
-   if (glsl_version >= 130)
+   if (native_integers)
       return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 
                                        st_src_reg_for_int(val);
    else
@@ -950,7 +951,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
 {
    st_src_reg src;
 
-   src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
+   src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
    src.file = PROGRAM_TEMPORARY;
    src.index = next_temp;
    src.reladdr = NULL;
@@ -1053,7 +1054,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
          this->next_temp += type_size(ir->type);
 
          dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
-               glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT));
+               native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
       }
 
 
@@ -1069,7 +1070,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
             }
          } else {
             st_src_reg src(PROGRAM_STATE_VAR, index,
-                  glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT);
+                  native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
             src.swizzle = slots[i].swizzle;
             emit(ir, TGSI_OPCODE_MOV, dst, src);
             /* even a float takes up a whole vec4 reg in a struct/array. */
@@ -1444,7 +1445,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       /* "==" operator producing a scalar boolean. */
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+         st_src_reg temp = get_temp(native_integers ?
                glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                glsl_type::vec4_type);
          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
@@ -1459,7 +1460,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       /* "!=" operator producing a scalar boolean. */
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+         st_src_reg temp = get_temp(native_integers ?
                glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                glsl_type::vec4_type);
          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
@@ -1514,7 +1515,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       break;
    case ir_unop_i2f:
    case ir_unop_b2f:
-      if (glsl_version >= 130) {
+      if (native_integers) {
          emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
          break;
       }
@@ -1526,7 +1527,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       result_src = op[0];
       break;
    case ir_unop_f2i:
-      if (glsl_version >= 130)
+      if (native_integers)
          emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
       else
          emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
@@ -1567,7 +1568,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
          break;
       }
    case ir_unop_u2f:
-      if (glsl_version >= 130) {
+      if (native_integers) {
          emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
          break;
       }
@@ -1719,7 +1720,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
    }
 
    this->result = st_src_reg(entry->file, entry->index, var->type);
-   if (glsl_version <= 120)
+   if (!native_integers)
       this->result.type = GLSL_TYPE_FLOAT;
 }
 
@@ -2109,27 +2110,27 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
       }
       break;
    case GLSL_TYPE_UINT:
-      gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT;
+      gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
-         if (glsl_version >= 130)
+         if (native_integers)
             values[i].u = ir->value.u[i];
          else
             values[i].f = ir->value.u[i];
       }
       break;
    case GLSL_TYPE_INT:
-      gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT;
+      gl_type = native_integers ? GL_INT : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
-         if (glsl_version >= 130)
+         if (native_integers)
             values[i].i = ir->value.i[i];
          else
             values[i].f = ir->value.i[i];
       }
       break;
    case GLSL_TYPE_BOOL:
-      gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT;
+      gl_type = native_integers ? GL_BOOL : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
-         if (glsl_version >= 130)
+         if (native_integers)
             values[i].b = ir->value.b[i];
          else
             values[i].f = ir->value.b[i];
@@ -3611,6 +3612,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    v->ctx = original->ctx;
    v->prog = prog;
    v->glsl_version = original->glsl_version;
+   v->native_integers = original->native_integers;
    v->options = original->options;
    v->next_temp = original->next_temp;
    v->num_address_regs = original->num_address_regs;
@@ -3739,6 +3741,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
    v->ctx = original->ctx;
    v->prog = prog;
    v->glsl_version = original->glsl_version;
+   v->native_integers = original->native_integers;
    v->options = original->options;
    v->next_temp = original->next_temp;
    v->num_address_regs = original->num_address_regs;
@@ -4674,6 +4677,7 @@ get_mesa_program(struct gl_context *ctx,
    v->shader_program = shader_program;
    v->options = options;
    v->glsl_version = ctx->Const.GLSLVersion;
+   v->native_integers = ctx->Const.NativeIntegers;
 
    add_uniforms_to_parameters_list(shader_program, shader, prog);
 

From e98ee06776e0ba055e0194836d5813a0bc7e7795 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 11 Aug 2011 16:42:01 -0700
Subject: [PATCH 472/600] i965/fs: Don't double-convert integer/boolean
 uniforms.

When ctx->Const.NativeIntegers is set, Core Mesa loads integer/boolean
uniforms directly, rather than loading the floating point equivalent.
So, when that's set, we don't need to perform any conversions.

Unfortunately, we can't properly support native integers with the old
vertex shader backend, so this patch leaves them disabled for now.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 36 +++++++++++++++-------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b19c6e72fa6..c8f74252654 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -279,23 +279,27 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
 
 	 assert(param < ARRAY_SIZE(c->prog_data.param));
 
-	 switch (type->base_type) {
-	 case GLSL_TYPE_FLOAT:
+	 if (ctx->Const.NativeIntegers) {
 	    c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
-	    break;
-	 case GLSL_TYPE_UINT:
-	    c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
-	    break;
-	 case GLSL_TYPE_INT:
-	    c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
-	    break;
-	 case GLSL_TYPE_BOOL:
-	    c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
-	    break;
-	 default:
-	    assert(!"not reached");
-	    c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
-	    break;
+	 } else {
+	    switch (type->base_type) {
+	    case GLSL_TYPE_FLOAT:
+	       c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+	       break;
+	    case GLSL_TYPE_UINT:
+	       c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
+	       break;
+	    case GLSL_TYPE_INT:
+	       c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
+	       break;
+	    case GLSL_TYPE_BOOL:
+	       c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
+	       break;
+	    default:
+	       assert(!"not reached");
+	       c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+	       break;
+	    }
 	 }
 	 this->param_index[param] = loc;
 	 this->param_offset[param] = i;

From f7d2dcae3b6bf39b14c1e71f0721d0e4a2833962 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 18 Aug 2011 02:15:56 -0700
Subject: [PATCH 473/600] i965/gen7: Use align1 mode to set URB_WRITE_HWORD
 channel enables.

Makes the new vertex shader backend work on Ivybridge.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 27e81306e9c..c5013de7ec1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2244,10 +2244,13 @@ void brw_urb_WRITE(struct brw_compile *p,
 
    if (intel->gen == 7) {
       /* Enable Channel Masks in the URB_WRITE_HWORD message header */
+      brw_push_insn_state(p);
+      brw_set_access_mode(p, BRW_ALIGN_1);
       brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
 		       BRW_REGISTER_TYPE_UD),
 	        retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
 		brw_imm_ud(0xff00));
+      brw_pop_insn_state(p);
    }
 
    insn = next_insn(p, BRW_OPCODE_SEND);

From a40008ac649acf0e7f47294145fe0b14393f03d3 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 8 Apr 2011 22:50:43 +0800
Subject: [PATCH 474/600] mesa: fix !FEATURE_GL build

Move vbo_exec_FlushVertices_internal out of FEATURE_beginend.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/vbo/vbo_exec_api.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index 2b8d38ef283..af66dbd44d4 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -431,6 +431,24 @@ do {									\
 #include "vbo_attrib_tmp.h"
 
 
+/**
+ * Flush (draw) vertices.
+ * \param  unmap - leave VBO unmapped after flushing?
+ */
+static void
+vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap)
+{
+   if (exec->vtx.vert_count || unmap) {
+      vbo_exec_vtx_flush( exec, unmap );
+   }
+
+   if (exec->vtx.vertex_size) {
+      vbo_exec_copy_to_current( exec );
+      reset_attrfv( exec );
+   }
+}
+
+
 #if FEATURE_beginend
 
 
@@ -534,24 +552,6 @@ static void GLAPIENTRY vbo_exec_EvalPoint2( GLint i, GLint j )
 #endif /* FEATURE_evaluators */
 
 
-/**
- * Flush (draw) vertices.
- * \param  unmap - leave VBO unmapped after flushing?
- */
-static void
-vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap)
-{
-   if (exec->vtx.vert_count || unmap) {
-      vbo_exec_vtx_flush( exec, unmap );
-   }
-
-   if (exec->vtx.vertex_size) {
-      vbo_exec_copy_to_current( exec );
-      reset_attrfv( exec );
-   }
-}
-
-
 /**
  * Called via glBegin.
  */

From 31753b50f3927ab45f8442cbe687bab9cee6d3bc Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Mon, 1 Aug 2011 11:14:18 +0900
Subject: [PATCH 475/600] glsl: remove an unnecessary header include

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 src/glsl/main.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp
index 9b8a50738ac..01921375070 100644
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -24,7 +24,6 @@
 
 #include "ast.h"
 #include "glsl_parser_extras.h"
-#include "glsl_parser.h"
 #include "ir_optimization.h"
 #include "ir_print_visitor.h"
 #include "program.h"

From cd893ccba9b7e4bafbdbbb71f79d1b40bcef01a8 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 5 Aug 2011 12:50:12 +0900
Subject: [PATCH 476/600] gallium: add PIPE_OS_ANDROID support

Android uses Linux kernel and its own C runtime.  It resembles
PIPE_OS_LINUX a lot with some minor exceptions.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/os/os_thread.h  |  2 +-
 src/gallium/auxiliary/util/u_math.h   | 12 +++++++++++-
 src/gallium/include/pipe/p_compiler.h |  2 ++
 src/gallium/include/pipe/p_config.h   |  8 ++++++++
 4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h
index 8f1245bff55..d8301298b7f 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -314,7 +314,7 @@ typedef int64_t pipe_condvar;
  * pipe_barrier
  */
 
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU)
+#if (defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU)) && !defined(PIPE_OS_ANDROID)
 
 typedef pthread_barrier_t pipe_barrier;
 
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 0b5284428eb..46d9322932a 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -199,6 +199,16 @@ roundf(float x)
 #endif /* _MSC_VER */
 
 
+#ifdef PIPE_OS_ANDROID
+
+static INLINE
+double log2(double d)
+{
+   return log(d) * (1.0 / M_LN2);
+}
+
+#endif
+
 
 
 
@@ -409,7 +419,7 @@ unsigned ffs( unsigned u )
 
    return i;
 }
-#elif defined(__MINGW32__)
+#elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID)
 #define ffs __builtin_ffs
 #endif
 
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index 8c788f4bb0e..05de9ff7cd0 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -67,7 +67,9 @@ extern "C" {
 
 
 #if !defined(__HAIKU__) && !defined(__USE_MISC)
+#if !defined(PIPE_OS_ANDROID)
 typedef unsigned int       uint;
+#endif
 typedef unsigned short     ushort;
 #endif
 typedef unsigned char      ubyte;
diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h
index 8a5d892c884..b3a7b337bc6 100644
--- a/src/gallium/include/pipe/p_config.h
+++ b/src/gallium/include/pipe/p_config.h
@@ -154,6 +154,14 @@
 #define PIPE_OS_UNIX
 #endif
 
+/*
+ * Android defines __linux__ so PIPE_OS_LINUX and PIPE_OS_UNIX will also be
+ * defined.
+ */
+#if defined(ANDROID)
+#define PIPE_OS_ANDROID
+#endif
+
 #if defined(__FreeBSD__)
 #define PIPE_OS_FREEBSD
 #define PIPE_OS_BSD

From b34770d83465f969eae57436d5aa9f4d31278c10 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 5 Aug 2011 12:52:49 +0900
Subject: [PATCH 477/600] ralloc: include limits.h for SIZE_MAX on Android

Android does not define SIZE_MAX in stdint.h.  We have to include
limits.h for it.

Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 src/glsl/ralloc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/glsl/ralloc.c b/src/glsl/ralloc.c
index 6a5eac6b9a3..fb48a91c564 100644
--- a/src/glsl/ralloc.c
+++ b/src/glsl/ralloc.c
@@ -28,6 +28,11 @@
 #include <string.h>
 #include <stdint.h>
 
+/* Android defines SIZE_MAX in limits.h, instead of the standard stdint.h */
+#ifdef ANDROID
+#include <limits.h>
+#endif
+
 #include "ralloc.h"
 
 #ifdef __GNUC__

From 504f92c739ffc916084ed821cb9f437276213057 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 5 Aug 2011 12:54:05 +0900
Subject: [PATCH 478/600] mesa: android has no log2f nor ffs

Define log2f(v) to be logf(v) / M_LN2 and ffs to __builtin_ffs.

Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/main/imports.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index 3fa1db02aee..70defdc4327 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -134,7 +134,13 @@ typedef union { GLfloat f; GLint i; } fi_type;
 #define exp2f(f) ((float) exp2(f))
 #define floorf(f) ((float) floor(f))
 #define logf(f) ((float) log(f))
+
+#ifdef ANDROID
+#define log2f(f) (logf(f) * (float) (1.0 / M_LN2))
+#else
 #define log2f(f) ((float) log2(f))
+#endif
+
 #define powf(x,y) ((float) pow(x,y))
 #define sinf(f) ((float) sin(f))
 #define sinhf(f) ((float) sinh(f))
@@ -562,7 +568,7 @@ _mesa_init_sqrt_table(void);
 
 #ifdef __GNUC__
 
-#ifdef __MINGW32__
+#if defined(__MINGW32__) || defined(ANDROID)
 #define ffs __builtin_ffs
 #define ffsll __builtin_ffsll
 #endif

From b0945c14dff96eb894c4a8b52a4c1374a05e2f6c Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Mon, 11 Oct 2010 16:09:52 +0800
Subject: [PATCH 479/600] egl: add _EGL_PLATFORM_ANDROID

This is Android Gingerbread platform.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 include/EGL/eglplatform.h | 9 +++++++++
 src/egl/main/egldisplay.c | 3 ++-
 src/egl/main/egldisplay.h | 1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/EGL/eglplatform.h b/include/EGL/eglplatform.h
index fbfdce32ef4..8d3f72f24b3 100644
--- a/include/EGL/eglplatform.h
+++ b/include/EGL/eglplatform.h
@@ -90,6 +90,15 @@ typedef struct gbm_device  *EGLNativeDisplayType;
 typedef struct gbm_bo      *EGLNativePixmapType;
 typedef void               *EGLNativeWindowType;
 
+#elif defined(ANDROID) /* Android */
+
+struct ANativeWindow;
+struct egl_native_pixmap_t;
+
+typedef struct ANativeWindow        *EGLNativeWindowType;
+typedef struct egl_native_pixmap_t  *EGLNativePixmapType;
+typedef void                        *EGLNativeDisplayType;
+
 #elif defined(__unix__) || defined(__unix)
 
 #ifdef MESA_EGL_NO_X11_HEADERS
diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c
index 512ad503f35..1d05e57c429 100644
--- a/src/egl/main/egldisplay.c
+++ b/src/egl/main/egldisplay.c
@@ -72,7 +72,8 @@ static const struct {
    { _EGL_PLATFORM_X11, "x11" },
    { _EGL_PLATFORM_WAYLAND, "wayland" },
    { _EGL_PLATFORM_DRM, "drm" },
-   { _EGL_PLATFORM_FBDEV, "fbdev" }
+   { _EGL_PLATFORM_FBDEV, "fbdev" },
+   { _EGL_PLATFORM_ANDROID, "android" }
 };
 
 
diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h
index 05335ada63c..f0815d028df 100644
--- a/src/egl/main/egldisplay.h
+++ b/src/egl/main/egldisplay.h
@@ -44,6 +44,7 @@ enum _egl_platform_type {
    _EGL_PLATFORM_WAYLAND,
    _EGL_PLATFORM_DRM,
    _EGL_PLATFORM_FBDEV,
+   _EGL_PLATFORM_ANDROID,
 
    _EGL_NUM_PLATFORMS,
    _EGL_INVALID_PLATFORM = -1

From 00b365bc78c131bfd1c19bb22ac55e36f4e8f3c5 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 1 Oct 2010 15:27:42 -0400
Subject: [PATCH 480/600] egl: add Android-specific extensions

Add EGL_ANDROID_image_native_buffer and EGL_ANDROID_swap_rectangle.
There is no spec for them though.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 include/EGL/eglext.h      | 14 ++++++++++++++
 src/egl/main/eglapi.c     | 25 +++++++++++++++++++++++++
 src/egl/main/eglapi.h     |  8 ++++++++
 src/egl/main/egldisplay.h |  3 +++
 src/egl/main/eglmisc.c    |  3 +++
 5 files changed, 53 insertions(+)

diff --git a/include/EGL/eglext.h b/include/EGL/eglext.h
index 9915b8cab3b..0449ae2cd1a 100644
--- a/include/EGL/eglext.h
+++ b/include/EGL/eglext.h
@@ -390,6 +390,20 @@ typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGIONNOK) (EGLDisplay dpy, EG
 #define EGL_Y_INVERTED_NOK			0x307F
 #endif /* EGL_NOK_texture_from_pixmap */
 
+#ifndef EGL_ANDROID_image_native_buffer
+#define EGL_ANDROID_image_native_buffer 1
+struct android_native_buffer_t;
+#define EGL_NATIVE_BUFFER_ANDROID       0x3140  /* eglCreateImageKHR target */
+#endif
+
+#ifndef EGL_ANDROID_swap_rectangle
+#define EGL_ANDROID_swap_rectangle 1
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSetSwapRectangleANDROID (EGLDisplay dpy, EGLSurface draw, EGLint left, EGLint top, EGLint width, EGLint height);
+#endif /* EGL_EGLEXT_PROTOTYPES */
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSETSWAPRECTANGLEANDROIDPROC) (EGLDisplay dpy, EGLSurface draw, EGLint left, EGLint top, EGLint width, EGLint height);
+#endif
+
 
 #ifdef __cplusplus
 }
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 5d186c60e5a..3cb1a5baaf3 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -947,6 +947,9 @@ eglGetProcAddress(const char *procname)
 #ifdef EGL_WL_bind_wayland_display
       { "eglBindWaylandDisplayWL", (_EGLProc) eglBindWaylandDisplayWL },
       { "eglUnbindWaylandDisplayWL", (_EGLProc) eglUnbindWaylandDisplayWL },
+#endif
+#ifdef EGL_ANDROID_swap_rectangle
+      { "eglSetSwapRectangleANDROID", (_EGLProc) eglSetSwapRectangleANDROID },
 #endif
       { NULL, NULL }
    };
@@ -1565,3 +1568,25 @@ eglUnbindWaylandDisplayWL(EGLDisplay dpy, struct wl_display *display)
    RETURN_EGL_EVAL(disp, ret);
 }
 #endif
+
+#ifdef EGL_ANDROID_swap_rectangle
+EGLBoolean EGLAPIENTRY
+eglSetSwapRectangleANDROID(EGLDisplay dpy, EGLSurface draw,
+                           EGLint left, EGLint top,
+                           EGLint width, EGLint height)
+{
+   _EGLDisplay *disp = _eglLockDisplay(dpy);
+   _EGLSurface *surf = _eglLookupSurface(draw, disp);
+   _EGLDriver *drv;
+   EGLBoolean ret;
+
+   _EGL_CHECK_SURFACE(disp, surf, EGL_FALSE, drv);
+
+   if (!disp->Extensions.ANDROID_swap_rectangle)
+      RETURN_EGL_EVAL(disp, EGL_FALSE);
+
+   ret = drv->API.SetSwapRectangleANDROID(drv, disp, surf, left, top, width, height);
+
+   RETURN_EGL_EVAL(disp, ret);
+}
+#endif
diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h
index 4fcbe40cd4c..1e0aef69dd7 100644
--- a/src/egl/main/eglapi.h
+++ b/src/egl/main/eglapi.h
@@ -131,6 +131,10 @@ typedef EGLBoolean (*BindWaylandDisplayWL_t)(_EGLDriver *drv, _EGLDisplay *disp,
 typedef EGLBoolean (*UnbindWaylandDisplayWL_t)(_EGLDriver *drv, _EGLDisplay *disp, struct wl_display *display);
 #endif
 
+#ifdef EGL_ANDROID_swap_rectangle
+typedef EGLBoolean (*SetSwapRectangleANDROID_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw, EGLint left, EGLint top, EGLint width, EGLint height);
+#endif
+
 /**
  * The API dispatcher jumps through these functions
  */
@@ -210,6 +214,10 @@ struct _egl_api
    BindWaylandDisplayWL_t BindWaylandDisplayWL;
    UnbindWaylandDisplayWL_t UnbindWaylandDisplayWL;
 #endif
+
+#ifdef EGL_ANDROID_swap_rectangle
+   SetSwapRectangleANDROID_t SetSwapRectangleANDROID;
+#endif
 };
 
 #endif /* EGLAPI_INCLUDED */
diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h
index f0815d028df..cddea803c24 100644
--- a/src/egl/main/egldisplay.h
+++ b/src/egl/main/egldisplay.h
@@ -108,6 +108,9 @@ struct _egl_extensions
 
    EGLBoolean NOK_swap_region;
    EGLBoolean NOK_texture_from_pixmap;
+
+   EGLBoolean ANDROID_image_native_buffer;
+   EGLBoolean ANDROID_swap_rectangle;
 };
 
 
diff --git a/src/egl/main/eglmisc.c b/src/egl/main/eglmisc.c
index da189b689a3..ab48bc68218 100644
--- a/src/egl/main/eglmisc.c
+++ b/src/egl/main/eglmisc.c
@@ -113,6 +113,9 @@ _eglUpdateExtensionsString(_EGLDisplay *dpy)
 
    _EGL_CHECK_EXTENSION(NOK_swap_region);
    _EGL_CHECK_EXTENSION(NOK_texture_from_pixmap);
+
+   _EGL_CHECK_EXTENSION(ANDROID_image_native_buffer);
+   _EGL_CHECK_EXTENSION(ANDROID_swap_rectangle);
 #undef _EGL_CHECK_EXTENSION
 }
 

From 8e54c47a6163d437cc44bc1b6ea6e0cfa6d4e774 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Thu, 18 Aug 2011 10:17:10 +0800
Subject: [PATCH 481/600] winsys/android: new SW winsys for Android

On Android, color buffers are passed between server and clients as
opaque buffer_handle_t.  This winsys makes use of gralloc, which
provides a generic way to map and unmap buffer_handle_t for CPU access.
---
 .../winsys/sw/android/android_sw_winsys.cpp   | 271 ++++++++++++++++++
 .../winsys/sw/android/android_sw_winsys.h     |  49 ++++
 2 files changed, 320 insertions(+)
 create mode 100644 src/gallium/winsys/sw/android/android_sw_winsys.cpp
 create mode 100644 src/gallium/winsys/sw/android/android_sw_winsys.h

diff --git a/src/gallium/winsys/sw/android/android_sw_winsys.cpp b/src/gallium/winsys/sw/android/android_sw_winsys.cpp
new file mode 100644
index 00000000000..49d8aa9e19f
--- /dev/null
+++ b/src/gallium/winsys/sw/android/android_sw_winsys.cpp
@@ -0,0 +1,271 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.12
+ *
+ * Copyright (C) 2010-2011 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+#include "state_tracker/sw_winsys.h"
+
+#include <ui/PixelFormat.h>
+#include <private/ui/sw_gralloc_handle.h>
+#include <hardware/gralloc.h>
+
+#include "android_sw_winsys.h"
+
+struct android_sw_winsys
+{
+   struct sw_winsys base;
+
+   const gralloc_module_t *grmod;
+};
+
+struct android_sw_displaytarget
+{
+   buffer_handle_t handle;
+   int stride;
+   int width, height;
+   int usage; /* gralloc usage */
+
+   void *mapped;
+};
+
+static INLINE struct android_sw_winsys *
+android_sw_winsys(struct sw_winsys *ws)
+{
+   return (struct android_sw_winsys *) ws;
+}
+
+static INLINE struct android_sw_displaytarget *
+android_sw_displaytarget(struct sw_displaytarget *dt)
+{
+   return (struct android_sw_displaytarget *) dt;
+}
+
+namespace android {
+
+static void
+android_displaytarget_display(struct sw_winsys *ws,
+                              struct sw_displaytarget *dt,
+                              void *context_private)
+{
+}
+
+static struct sw_displaytarget *
+android_displaytarget_create(struct sw_winsys *ws,
+                             unsigned tex_usage,
+                             enum pipe_format format,
+                             unsigned width, unsigned height,
+                             unsigned alignment,
+                             unsigned *stride)
+{
+   return NULL;
+}
+
+static void
+android_displaytarget_destroy(struct sw_winsys *ws,
+                              struct sw_displaytarget *dt)
+{
+   struct android_sw_displaytarget *adt = android_sw_displaytarget(dt);
+
+   assert(!adt->mapped);
+   FREE(adt);
+}
+
+static void
+android_displaytarget_unmap(struct sw_winsys *ws,
+                            struct sw_displaytarget *dt)
+{
+   struct android_sw_winsys *droid = android_sw_winsys(ws);
+   struct android_sw_displaytarget *adt = android_sw_displaytarget(dt);
+
+   if (adt->mapped) {
+      if (sw_gralloc_handle_t::validate(adt->handle) >= 0) {
+         adt->mapped = NULL;
+      }
+      else {
+         droid->grmod->unlock(droid->grmod, adt->handle);
+         adt->mapped = NULL;
+      }
+   }
+}
+
+static void *
+android_displaytarget_map(struct sw_winsys *ws,
+                          struct sw_displaytarget *dt,
+                          unsigned flags)
+{
+   struct android_sw_winsys *droid = android_sw_winsys(ws);
+   struct android_sw_displaytarget *adt = android_sw_displaytarget(dt);
+
+   if (!adt->mapped) {
+      if (sw_gralloc_handle_t::validate(adt->handle) >= 0) {
+         const sw_gralloc_handle_t *swhandle =
+            reinterpret_cast<const sw_gralloc_handle_t *>(adt->handle);
+         adt->mapped = reinterpret_cast<void *>(swhandle->base);
+      }
+      else {
+         /* lock the buffer for CPU access */
+         droid->grmod->lock(droid->grmod, adt->handle,
+               adt->usage, 0, 0, adt->width, adt->height, &adt->mapped);
+      }
+   }
+
+   return adt->mapped;
+}
+
+static struct sw_displaytarget *
+android_displaytarget_from_handle(struct sw_winsys *ws,
+                                  const struct pipe_resource *templ,
+                                  struct winsys_handle *whandle,
+                                  unsigned *stride)
+{
+   struct android_winsys_handle *ahandle =
+      (struct android_winsys_handle *) whandle;
+   struct android_sw_displaytarget *adt;
+
+   adt = CALLOC_STRUCT(android_sw_displaytarget);
+   if (!adt)
+      return NULL;
+
+   adt->handle = ahandle->handle;
+   adt->stride = ahandle->stride;
+   adt->width = templ->width0;
+   adt->height = templ->height0;
+
+   if (templ->usage & PIPE_BIND_RENDER_TARGET)
+      adt->usage |= GRALLOC_USAGE_HW_RENDER;
+   if (templ->usage & PIPE_BIND_SAMPLER_VIEW)
+      adt->usage |= GRALLOC_USAGE_HW_TEXTURE;
+   if (templ->usage & PIPE_BIND_SCANOUT)
+      adt->usage |= GRALLOC_USAGE_HW_FB;
+
+   if (templ->usage & PIPE_BIND_TRANSFER_READ)
+      adt->usage |= GRALLOC_USAGE_SW_READ_OFTEN;
+   if (templ->usage & PIPE_BIND_TRANSFER_WRITE)
+      adt->usage |= GRALLOC_USAGE_SW_WRITE_OFTEN;
+
+   if (stride)
+      *stride = adt->stride;
+
+   return reinterpret_cast<struct sw_displaytarget *>(adt);
+}
+
+static boolean
+android_displaytarget_get_handle(struct sw_winsys *ws,
+                                 struct sw_displaytarget *dt,
+                                 struct winsys_handle *whandle)
+{
+   return FALSE;
+}
+
+static boolean
+android_is_displaytarget_format_supported(struct sw_winsys *ws,
+                                          unsigned tex_usage,
+                                          enum pipe_format format)
+{
+   struct android_sw_winsys *droid = android_sw_winsys(ws);
+   int fmt;
+
+   switch (format) {
+   case PIPE_FORMAT_R8G8B8A8_UNORM:
+      fmt = PIXEL_FORMAT_RGBA_8888;
+      break;
+   case PIPE_FORMAT_R8G8B8X8_UNORM:
+      fmt = PIXEL_FORMAT_RGBX_8888;
+      break;
+   case PIPE_FORMAT_R8G8B8_UNORM:
+      fmt = PIXEL_FORMAT_RGB_888;
+      break;
+   case PIPE_FORMAT_B5G6R5_UNORM:
+      fmt = PIXEL_FORMAT_RGB_565;
+      break;
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+      fmt = PIXEL_FORMAT_BGRA_8888;
+      break;
+   case PIPE_FORMAT_A8_UNORM:
+      fmt = PIXEL_FORMAT_A_8;
+      break;
+   case PIPE_FORMAT_L8_UNORM:
+      fmt = PIXEL_FORMAT_L_8;
+      break;
+   case PIPE_FORMAT_L8A8_UNORM:
+      fmt = PIXEL_FORMAT_LA_88;
+      break;
+   default:
+      fmt = PIXEL_FORMAT_NONE;
+      break;
+   }
+
+   return (fmt != PIXEL_FORMAT_NONE);
+}
+
+static void
+android_destroy(struct sw_winsys *ws)
+{
+   struct android_sw_winsys *droid = android_sw_winsys(ws);
+
+   FREE(droid);
+}
+
+}; /* namespace android */
+
+using namespace android;
+
+struct sw_winsys *
+android_create_sw_winsys(void)
+{
+   struct android_sw_winsys *droid;
+   const hw_module_t *mod;
+
+   droid = CALLOC_STRUCT(android_sw_winsys);
+   if (!droid)
+      return NULL;
+
+   if (hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &mod)) {
+      FREE(droid);
+      return NULL;
+   }
+
+   droid->grmod = (const gralloc_module_t *) mod;
+
+   droid->base.destroy = android_destroy;
+   droid->base.is_displaytarget_format_supported =
+      android_is_displaytarget_format_supported;
+
+   droid->base.displaytarget_create = android_displaytarget_create;
+   droid->base.displaytarget_destroy = android_displaytarget_destroy;
+   droid->base.displaytarget_from_handle = android_displaytarget_from_handle;
+   droid->base.displaytarget_get_handle = android_displaytarget_get_handle;
+
+   droid->base.displaytarget_map = android_displaytarget_map;
+   droid->base.displaytarget_unmap = android_displaytarget_unmap;
+   droid->base.displaytarget_display = android_displaytarget_display;
+
+   return &droid->base;
+}
diff --git a/src/gallium/winsys/sw/android/android_sw_winsys.h b/src/gallium/winsys/sw/android/android_sw_winsys.h
new file mode 100644
index 00000000000..79392dc0f97
--- /dev/null
+++ b/src/gallium/winsys/sw/android/android_sw_winsys.h
@@ -0,0 +1,49 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.12
+ *
+ * Copyright (C) 2010-2011 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ANDROID_SW_WINSYS
+#define ANDROID_SW_WINSYS
+
+#include <sys/cdefs.h>
+#include <hardware/gralloc.h>
+
+__BEGIN_DECLS
+
+struct sw_winsys;
+
+struct android_winsys_handle {
+   buffer_handle_t handle;
+   int stride;
+};
+
+struct sw_winsys *
+android_create_sw_winsys(void);
+
+__END_DECLS
+
+#endif /* ANDROID_SW_WINSYS */

From 15418a850575b2a33132165605293ed835b4be53 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Thu, 18 Aug 2011 10:28:29 +0800
Subject: [PATCH 482/600] st/egl: add android backend

Both HW and SW rendering are supported for Android.  For SW rendering,
we use the generic gralloc lock/unlock for mapping and unmapping color
buffers (in winsys/android).

For HW rendering, we need to know the real type of color buffers.  This
backend works with drm_gralloc, where a color buffer is backed by a GEM
object.
---
 .../egl/android/native_android.cpp            | 739 ++++++++++++++++++
 .../state_trackers/egl/common/egl_g3d.c       |  11 +
 .../state_trackers/egl/common/egl_g3d_image.c |  24 +
 .../state_trackers/egl/common/native.h        |   3 +
 .../state_trackers/egl/common/native_buffer.h |   4 +
 5 files changed, 781 insertions(+)
 create mode 100644 src/gallium/state_trackers/egl/android/native_android.cpp

diff --git a/src/gallium/state_trackers/egl/android/native_android.cpp b/src/gallium/state_trackers/egl/android/native_android.cpp
new file mode 100644
index 00000000000..5c4584b2780
--- /dev/null
+++ b/src/gallium/state_trackers/egl/android/native_android.cpp
@@ -0,0 +1,739 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.12
+ *
+ * Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+ * Copyright (C) 2010-2011 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#define LOG_TAG "MESA-EGL"
+#include <cutils/log.h>
+#include <cutils/properties.h>
+#include <ui/PixelFormat.h>
+#include <ui/android_native_buffer.h>
+
+extern "C" {
+#include "egllog.h"
+}
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "common/native.h"
+#include "common/native_helper.h"
+#include "android/android_sw_winsys.h"
+#include "state_tracker/drm_driver.h"
+
+struct android_config;
+
+struct android_display {
+   struct native_display base;
+
+   boolean use_drm;
+   const struct native_event_handler *event_handler;
+   struct android_config *configs;
+   int num_configs;
+};
+
+struct android_surface {
+   struct native_surface base;
+
+   struct android_display *adpy;
+   android_native_window_t *win;
+
+   uint stamp;
+   android_native_buffer_t *buf;
+   struct pipe_resource *res;
+
+   /* cache the current front and back resources */
+   void *cache_handles[2];
+   struct pipe_resource *cache_resources[2];
+};
+
+struct android_config {
+   struct native_config base;
+};
+
+static INLINE struct android_display *
+android_display(const struct native_display *ndpy)
+{
+   return (struct android_display *) ndpy;
+}
+
+static INLINE struct android_surface *
+android_surface(const struct native_surface *nsurf)
+{
+   return (struct android_surface *) nsurf;
+}
+
+static INLINE struct android_config *
+android_config(const struct native_config *nconf)
+{
+   return (struct android_config *) nconf;
+}
+
+namespace android {
+
+static enum pipe_format
+get_pipe_format(int native)
+{
+   enum pipe_format fmt;
+
+   /* see libpixelflinger/format.cpp */
+   switch (native) {
+   case PIXEL_FORMAT_RGBA_8888:
+      fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
+      break;
+   case PIXEL_FORMAT_RGBX_8888:
+      fmt = PIPE_FORMAT_R8G8B8X8_UNORM;
+      break;
+   case PIXEL_FORMAT_RGB_888:
+      fmt = PIPE_FORMAT_R8G8B8_UNORM;
+      break;
+   case PIXEL_FORMAT_RGB_565:
+      fmt = PIPE_FORMAT_B5G6R5_UNORM;
+      break;
+   case PIXEL_FORMAT_BGRA_8888:
+      fmt = PIPE_FORMAT_B8G8R8A8_UNORM;
+      break;
+   case PIXEL_FORMAT_A_8:
+      fmt = PIPE_FORMAT_A8_UNORM;
+      break;
+   case PIXEL_FORMAT_L_8:
+      fmt = PIPE_FORMAT_L8_UNORM;
+      break;
+   case PIXEL_FORMAT_LA_88:
+      fmt = PIPE_FORMAT_L8A8_UNORM;
+      break;
+   case PIXEL_FORMAT_NONE:
+   case PIXEL_FORMAT_RGBA_5551:
+   case PIXEL_FORMAT_RGBA_4444:
+   case PIXEL_FORMAT_RGB_332:
+   default:
+      LOGE("unsupported native format 0x%x", native);
+      fmt = PIPE_FORMAT_NONE;
+      break;
+   }
+
+   return fmt;
+}
+
+#include <gralloc_drm_handle.h>
+static int
+get_handle_name(buffer_handle_t handle)
+{
+   struct gralloc_drm_handle_t *dh;
+
+   /* check that the buffer is allocated by drm_gralloc and cast */
+   dh = gralloc_drm_handle(handle);
+
+   return (dh) ? dh->name : 0;
+}
+
+/**
+ * Import an android_native_buffer_t allocated by the server.
+ */
+static struct pipe_resource *
+import_buffer(struct android_display *adpy, const struct pipe_resource *templ,
+              struct android_native_buffer_t *abuf)
+{
+   struct pipe_screen *screen = adpy->base.screen;
+   struct pipe_resource *res;
+
+   if (templ->bind & PIPE_BIND_RENDER_TARGET) {
+      if (!screen->is_format_supported(screen, templ->format,
+               templ->target, 0, PIPE_BIND_RENDER_TARGET))
+         LOGW("importing unsupported buffer as render target");
+   }
+   if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
+      if (!screen->is_format_supported(screen, templ->format,
+               templ->target, 0, PIPE_BIND_SAMPLER_VIEW))
+         LOGW("importing unsupported buffer as sampler view");
+   }
+
+   if (adpy->use_drm) {
+      struct winsys_handle handle;
+
+      memset(&handle, 0, sizeof(handle));
+      handle.type = DRM_API_HANDLE_TYPE_SHARED;
+      /* for DRM, we need the GEM name */
+      handle.handle = get_handle_name(abuf->handle);
+      if (!handle.handle) {
+         LOGE("unable to import invalid buffer %p", abuf);
+         return NULL;
+      }
+
+      handle.stride =
+         abuf->stride * util_format_get_blocksize(templ->format);
+
+      res = screen->resource_from_handle(screen, templ, &handle);
+   }
+   else {
+      struct android_winsys_handle handle;
+
+      memset(&handle, 0, sizeof(handle));
+      handle.handle = abuf->handle;
+      handle.stride =
+         abuf->stride * util_format_get_blocksize(templ->format);
+
+      res = screen->resource_from_handle(screen,
+            templ, (struct winsys_handle *) &handle);
+   }
+
+   if (!res)
+      LOGE("failed to import buffer %p", abuf);
+
+   return res;
+}
+
+/**
+ * Dequeue the next back buffer for rendering.
+ */
+static boolean
+android_surface_dequeue_buffer(struct native_surface *nsurf)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   void *handle;
+   int idx;
+
+   if (asurf->win->dequeueBuffer(asurf->win, &asurf->buf) != NO_ERROR) {
+      LOGE("failed to dequeue window %p", asurf->win);
+      return FALSE;
+   }
+
+   asurf->buf->common.incRef(&asurf->buf->common);
+   asurf->win->lockBuffer(asurf->win, asurf->buf);
+
+   if (asurf->adpy->use_drm)
+      handle = (void *) get_handle_name(asurf->buf->handle);
+   else
+      handle = (void *) asurf->buf->handle;
+   /* NULL is invalid */
+   if (!handle) {
+      LOGE("window %p returned an invalid buffer", asurf->win);
+      return TRUE;
+   }
+
+   /* find the slot to use */
+   for (idx = 0; idx < Elements(asurf->cache_handles); idx++) {
+      if (asurf->cache_handles[idx] == handle || !asurf->cache_handles[idx])
+         break;
+   }
+   if (idx == Elements(asurf->cache_handles)) {
+      /* buffer reallocated; clear the cache */
+      for (idx = 0; idx < Elements(asurf->cache_handles); idx++) {
+         asurf->cache_handles[idx] = 0;
+         pipe_resource_reference(&asurf->cache_resources[idx], NULL);
+      }
+      idx = 0;
+   }
+
+   /* update the cache */
+   if (!asurf->cache_handles[idx]) {
+      struct pipe_resource templ;
+
+      assert(!asurf->cache_resources[idx]);
+
+      memset(&templ, 0, sizeof(templ));
+      templ.target = PIPE_TEXTURE_2D;
+      templ.last_level = 0;
+      templ.width0 = asurf->buf->width;
+      templ.height0 = asurf->buf->height;
+      templ.depth0 = 1;
+      templ.bind = PIPE_BIND_RENDER_TARGET;
+      if (!asurf->adpy->use_drm) {
+         templ.bind |= PIPE_BIND_TRANSFER_WRITE |
+                       PIPE_BIND_TRANSFER_READ;
+      }
+
+      templ.format = get_pipe_format(asurf->buf->format);
+      if (templ.format != PIPE_FORMAT_NONE) {
+         asurf->cache_resources[idx] =
+            import_buffer(asurf->adpy, &templ, asurf->buf);
+      }
+      else {
+         asurf->cache_resources[idx] = NULL;
+      }
+
+      asurf->cache_handles[idx] = handle;
+   }
+
+   pipe_resource_reference(&asurf->res, asurf->cache_resources[idx]);
+
+   return TRUE;
+}
+
+/**
+ * Enqueue the back buffer.  This will make it the next front buffer.
+ */
+static boolean
+android_surface_enqueue_buffer(struct native_surface *nsurf)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+
+   pipe_resource_reference(&asurf->res, NULL);
+
+   asurf->win->queueBuffer(asurf->win, asurf->buf);
+
+   asurf->buf->common.decRef(&asurf->buf->common);
+   asurf->buf = NULL;
+
+   return TRUE;
+}
+
+static boolean
+android_surface_swap_buffers(struct native_surface *nsurf)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   struct android_display *adpy = asurf->adpy;
+
+   if (!asurf->buf)
+      return FALSE;
+
+   android_surface_enqueue_buffer(&asurf->base);
+
+   asurf->stamp++;
+   adpy->event_handler->invalid_surface(&adpy->base,
+         &asurf->base, asurf->stamp);
+
+   return TRUE;
+}
+
+static boolean
+android_surface_present(struct native_surface *nsurf,
+                        enum native_attachment natt,
+                        boolean preserve,
+                        uint swap_interval)
+{
+   boolean ret;
+
+   if (swap_interval || natt != NATIVE_ATTACHMENT_BACK_LEFT)
+      return FALSE;
+
+   return android_surface_swap_buffers(nsurf);
+}
+
+static boolean
+android_surface_validate(struct native_surface *nsurf, uint attachment_mask,
+                         unsigned int *seq_num, struct pipe_resource **textures,
+                         int *width, int *height)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   struct winsys_handle handle;
+
+   if (!asurf->buf) {
+      if (!android_surface_dequeue_buffer(&asurf->base))
+         return FALSE;
+   }
+
+   if (textures) {
+      /* we have access to only the back buffer */
+      const enum native_attachment att = NATIVE_ATTACHMENT_BACK_LEFT;
+
+      if (native_attachment_mask_test(attachment_mask, att)) {
+         textures[att] = NULL;
+         pipe_resource_reference(&textures[att], asurf->res);
+      }
+   }
+
+   if (seq_num)
+      *seq_num = asurf->stamp;
+   if (width)
+      *width = asurf->buf->width;
+   if (height)
+      *height = asurf->buf->height;
+
+   return TRUE;
+}
+
+static void
+android_surface_wait(struct native_surface *nsurf)
+{
+}
+
+static void
+android_surface_destroy(struct native_surface *nsurf)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   int i;
+
+   if (asurf->buf)
+      android_surface_enqueue_buffer(&asurf->base);
+
+   for (i = 0; i < Elements(asurf->cache_handles); i++)
+      pipe_resource_reference(&asurf->cache_resources[i], NULL);
+
+   asurf->win->common.decRef(&asurf->win->common);
+
+   FREE(asurf);
+}
+
+static struct native_surface *
+android_display_create_window_surface(struct native_display *ndpy,
+                                      EGLNativeWindowType win,
+                                      const struct native_config *nconf)
+{
+   struct android_display *adpy = android_display(ndpy);
+   struct android_config *aconf = android_config(nconf);
+   struct android_surface *asurf;
+   enum pipe_format format;
+   int val;
+
+   if (win->common.magic != ANDROID_NATIVE_WINDOW_MAGIC) {
+      LOGE("invalid native window with magic 0x%x", win->common.magic);
+      return NULL;
+   }
+   if (win->query(win, NATIVE_WINDOW_FORMAT, &val)) {
+      LOGE("failed to query native window format");
+      return NULL;
+   }
+   format = get_pipe_format(val);
+   if (format != nconf->color_format) {
+      LOGW("native window format 0x%x != config format 0x%x",
+            format, nconf->color_format);
+      if (!adpy->base.screen->is_format_supported(adpy->base.screen,
+               format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET)) {
+         LOGE("and the native window cannot be used as a render target");
+         return NULL;
+      }
+   }
+
+   asurf = CALLOC_STRUCT(android_surface);
+   if (!asurf)
+      return NULL;
+
+   asurf->adpy = adpy;
+   asurf->win = win;
+   asurf->win->common.incRef(&asurf->win->common);
+
+   /* request buffers that are for CPU access */
+   if (!adpy->use_drm) {
+      native_window_set_usage(asurf->win,
+            GRALLOC_USAGE_SW_READ_OFTEN | GRALLOC_USAGE_SW_WRITE_OFTEN);
+   }
+
+   asurf->base.destroy = android_surface_destroy;
+   asurf->base.present = android_surface_present;
+   asurf->base.validate = android_surface_validate;
+   asurf->base.wait = android_surface_wait;
+
+   return &asurf->base;
+}
+
+static boolean
+android_display_init_configs(struct native_display *ndpy)
+{
+   struct android_display *adpy = android_display(ndpy);
+   const int native_formats[] = {
+      PIXEL_FORMAT_RGBA_8888,
+      PIXEL_FORMAT_RGBX_8888,
+      PIXEL_FORMAT_RGB_888,
+      PIXEL_FORMAT_RGB_565,
+      PIXEL_FORMAT_BGRA_8888,
+      PIXEL_FORMAT_A_8
+   };
+   int i;
+
+   adpy->configs = (struct android_config *)
+      CALLOC(Elements(native_formats), sizeof(*adpy->configs));
+   if (!adpy->configs)
+      return FALSE;
+
+   for (i = 0; i < Elements(native_formats); i++) {
+      enum pipe_format color_format;
+      struct android_config *aconf;
+
+      color_format = get_pipe_format(native_formats[i]);
+      if (color_format == PIPE_FORMAT_NONE ||
+          !adpy->base.screen->is_format_supported(adpy->base.screen,
+               color_format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET)) {
+         LOGI("skip unsupported native format 0x%x", native_formats[i]);
+         continue;
+      }
+
+      aconf = &adpy->configs[adpy->num_configs++];
+      /* only the back buffer */
+      aconf->base.buffer_mask = 1 << NATIVE_ATTACHMENT_BACK_LEFT;
+      aconf->base.color_format = color_format;
+      aconf->base.window_bit = TRUE;
+
+      aconf->base.native_visual_id = native_formats[i];
+      aconf->base.native_visual_type = native_formats[i];
+   }
+
+   return TRUE;
+}
+
+static boolean
+android_display_init_drm(struct native_display *ndpy)
+{
+   struct android_display *adpy = android_display(ndpy);
+   const hw_module_t *mod;
+   int fd, err;
+
+   /* get the authorized fd from gralloc */
+   err = hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &mod);
+   if (!err) {
+      const gralloc_module_t *gr = (gralloc_module_t *) mod;
+
+      err = -EINVAL;
+      if (gr->perform)
+         err = gr->perform(gr, GRALLOC_MODULE_PERFORM_GET_DRM_FD, &fd);
+   }
+   if (!err && fd >= 0) {
+      adpy->base.screen =
+         adpy->event_handler->new_drm_screen(&adpy->base, NULL, fd);
+   }
+
+   if (adpy->base.screen) {
+      LOGI("using DRM screen");
+      return TRUE;
+   }
+   else {
+      LOGE("failed to create DRM screen");
+      return FALSE;
+   }
+}
+
+static boolean
+android_display_init_sw(struct native_display *ndpy)
+{
+   struct android_display *adpy = android_display(ndpy);
+   struct sw_winsys *ws;
+
+   ws = android_create_sw_winsys();
+   if (ws) {
+      adpy->base.screen =
+         adpy->event_handler->new_sw_screen(&adpy->base, ws);
+   }
+
+   if (adpy->base.screen) {
+      LOGI("using SW screen");
+      return TRUE;
+   }
+   else {
+      LOGE("failed to create SW screen");
+      return FALSE;
+   }
+}
+
+static boolean
+android_display_init_screen(struct native_display *ndpy)
+{
+   struct android_display *adpy = android_display(ndpy);
+
+   if (adpy->use_drm)
+      android_display_init_drm(&adpy->base);
+   else
+      android_display_init_sw(&adpy->base);
+
+   if (!adpy->base.screen)
+      return FALSE;
+
+   if (!android_display_init_configs(&adpy->base)) {
+      adpy->base.screen->destroy(adpy->base.screen);
+      adpy->base.screen = NULL;
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+static void
+android_display_destroy(struct native_display *ndpy)
+{
+   struct android_display *adpy = android_display(ndpy);
+
+   FREE(adpy->configs);
+   if (adpy->base.screen)
+      adpy->base.screen->destroy(adpy->base.screen);
+   FREE(adpy);
+}
+
+static const struct native_config **
+android_display_get_configs(struct native_display *ndpy, int *num_configs)
+{
+   struct android_display *adpy = android_display(ndpy);
+   const struct native_config **configs;
+   int i;
+
+   configs = (const struct native_config **)
+      MALLOC(adpy->num_configs * sizeof(*configs));
+   if (configs) {
+      for (i = 0; i < adpy->num_configs; i++)
+         configs[i] = (const struct native_config *) &adpy->configs[i];
+      if (num_configs)
+         *num_configs = adpy->num_configs;
+   }
+
+   return configs;
+}
+
+static int
+android_display_get_param(struct native_display *ndpy,
+                          enum native_param_type param)
+{
+   int val;
+
+   switch (param) {
+   default:
+      val = 0;
+      break;
+   }
+
+   return val;
+}
+
+static struct pipe_resource *
+android_display_import_buffer(struct native_display *ndpy,
+                              struct native_buffer *nbuf)
+{
+   struct android_display *adpy = android_display(ndpy);
+   struct android_native_buffer_t *abuf;
+   enum pipe_format format;
+   struct pipe_resource templ;
+
+   if (nbuf->type != NATIVE_BUFFER_ANDROID)
+      return NULL;
+
+   abuf = nbuf->u.android;
+
+   if (!abuf || abuf->common.magic != ANDROID_NATIVE_BUFFER_MAGIC ||
+       abuf->common.version != sizeof(*abuf)) {
+      LOGE("invalid android native buffer");
+      return NULL;
+   }
+
+   format = get_pipe_format(abuf->format);
+   if (format == PIPE_FORMAT_NONE)
+      return NULL;
+
+   memset(&templ, 0, sizeof(templ));
+   templ.target = PIPE_TEXTURE_2D;
+   templ.format = format;
+   /* assume for texturing only */
+   templ.bind = PIPE_BIND_SAMPLER_VIEW;
+   templ.width0 = abuf->width;
+   templ.height0 = abuf->height;
+   templ.depth0 = 1;
+   templ.array_size = 1;
+
+   return import_buffer(adpy, &templ, abuf);
+}
+
+static boolean
+android_display_export_buffer(struct native_display *ndpy,
+                              struct pipe_resource *res,
+                              struct native_buffer *nbuf)
+{
+   return FALSE;
+}
+
+static struct native_display_buffer android_display_buffer = {
+   android_display_import_buffer,
+   android_display_export_buffer
+};
+
+static struct android_display *
+android_display_create(const struct native_event_handler *event_handler,
+                       boolean use_sw)
+{
+   struct android_display *adpy;
+   char value[PROPERTY_VALUE_MAX];
+   boolean force_sw;
+
+   /* check if SW renderer is forced */
+   if (property_get("debug.mesa.software", value, NULL))
+      force_sw = (atoi(value) != 0);
+   else
+      force_sw = debug_get_bool_option("EGL_SOFTWARE", FALSE);
+   if (force_sw)
+      use_sw = TRUE;
+
+   adpy = CALLOC_STRUCT(android_display);
+   if (!adpy)
+      return NULL;
+
+   adpy->event_handler = event_handler;
+   adpy->use_drm = !use_sw;
+
+   adpy->base.init_screen = android_display_init_screen;
+   adpy->base.destroy = android_display_destroy;
+   adpy->base.get_param = android_display_get_param;
+   adpy->base.get_configs = android_display_get_configs;
+   adpy->base.create_window_surface = android_display_create_window_surface;
+
+   adpy->base.buffer = &android_display_buffer;
+
+   return adpy;
+}
+
+static const struct native_event_handler *android_event_handler;
+
+static struct native_display *
+native_create_display(void *dpy, boolean use_sw)
+{
+   struct android_display *adpy;
+
+   adpy = android_display_create(android_event_handler, use_sw);
+
+   return (adpy) ? &adpy->base : NULL;
+}
+
+static const struct native_platform android_platform = {
+   "Android", /* name */
+   native_create_display
+};
+
+}; /* namespace android */
+
+using namespace android;
+
+static void
+android_log(EGLint level, const char *msg)
+{
+   switch (level) {
+   case _EGL_DEBUG:
+      LOGD("%s", msg);
+      break;
+   case _EGL_INFO:
+      LOGI("%s", msg);
+      break;
+   case _EGL_WARNING:
+      LOGW("%s", msg);
+      break;
+   case _EGL_FATAL:
+      LOG_FATAL("%s", msg);
+      break;
+   default:
+      break;
+   }
+}
+
+const struct native_platform *
+native_get_android_platform(const struct native_event_handler *event_handler)
+{
+   android_event_handler = event_handler;
+   /* use Android logger */
+   _eglSetLogProc(android_log);
+
+   return &android_platform;
+}
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c
index 6649f02b244..b5e3d99b811 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d.c
@@ -130,6 +130,12 @@ egl_g3d_get_platform(_EGLDriver *drv, _EGLPlatformType plat)
          plat_name = "FBDEV";
 #ifdef HAVE_FBDEV_BACKEND
          nplat = native_get_fbdev_platform(&egl_g3d_native_event_handler);
+#endif
+         break;
+      case _EGL_PLATFORM_ANDROID:
+         plat_name = "Android";
+#ifdef HAVE_ANDROID_BACKEND
+         nplat = native_get_android_platform(&egl_g3d_native_event_handler);
 #endif
          break;
       default:
@@ -572,6 +578,11 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy)
    if (dpy->Platform == _EGL_PLATFORM_WAYLAND && gdpy->native->buffer)
       dpy->Extensions.MESA_drm_image = EGL_TRUE;
 
+#ifdef EGL_ANDROID_image_native_buffer
+   if (dpy->Platform == _EGL_PLATFORM_ANDROID && gdpy->native->buffer)
+      dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE;
+#endif
+
 #ifdef EGL_WL_bind_wayland_display
    if (gdpy->native->wayland_bufmgr)
       dpy->Extensions.WL_bind_wayland_display = EGL_TRUE;
diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_image.c b/src/gallium/state_trackers/egl/common/egl_g3d_image.c
index 7e9a29b0284..4d90c400319 100644
--- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c
@@ -202,6 +202,24 @@ egl_g3d_reference_wl_buffer(_EGLDisplay *dpy, struct wl_buffer *buffer,
 
 #endif /* EGL_WL_bind_wayland_display */
 
+#ifdef EGL_ANDROID_image_native_buffer
+
+static struct pipe_resource *
+egl_g3d_reference_android_native_buffer(_EGLDisplay *dpy,
+                                        struct android_native_buffer_t *buf)
+{
+   struct egl_g3d_display *gdpy = egl_g3d_display(dpy);
+   struct native_buffer nbuf;
+
+   memset(&nbuf, 0, sizeof(nbuf));
+   nbuf.type = NATIVE_BUFFER_ANDROID;
+   nbuf.u.android = buf;
+    
+   return gdpy->native->buffer->import_buffer(gdpy->native, &nbuf);
+}
+
+#endif /* EGL_ANDROID_image_native_buffer */
+
 _EGLImage *
 egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx,
                      EGLenum target, EGLClientBuffer buffer,
@@ -238,6 +256,12 @@ egl_g3d_create_image(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx,
       ptex = egl_g3d_reference_wl_buffer(dpy,
             (struct wl_buffer *) buffer, &gimg->base, attribs);
       break;
+#endif
+#ifdef EGL_ANDROID_image_native_buffer
+   case EGL_NATIVE_BUFFER_ANDROID:
+      ptex = egl_g3d_reference_android_native_buffer(dpy,
+            (struct android_native_buffer_t *) buffer);
+      break;
 #endif
    default:
       ptex = NULL;
diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h
index fc50ee485fe..58593a489cd 100644
--- a/src/gallium/state_trackers/egl/common/native.h
+++ b/src/gallium/state_trackers/egl/common/native.h
@@ -293,6 +293,9 @@ native_get_drm_platform(const struct native_event_handler *event_handler);
 const struct native_platform *
 native_get_fbdev_platform(const struct native_event_handler *event_handler);
 
+const struct native_platform *
+native_get_android_platform(const struct native_event_handler *event_handler);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/state_trackers/egl/common/native_buffer.h b/src/gallium/state_trackers/egl/common/native_buffer.h
index b8a66d17e12..503ed580b05 100644
--- a/src/gallium/state_trackers/egl/common/native_buffer.h
+++ b/src/gallium/state_trackers/egl/common/native_buffer.h
@@ -33,9 +33,11 @@
 #include "pipe/p_state.h"
 
 struct native_display;
+struct android_native_buffer_t;
 
 enum native_buffer_type {
    NATIVE_BUFFER_DRM,
+   NATIVE_BUFFER_ANDROID,
 
    NUM_NATIVE_BUFFERS
 };
@@ -50,6 +52,8 @@ struct native_buffer {
          unsigned handle; /**< the handle of the GEM object */
          unsigned stride;
       } drm;
+
+      struct android_native_buffer_t *android; /**< opaque native buffer */
    } u;
 };
 

From 4b2b0b9fb8d41e7a8df4cdc49d498fcfb99139df Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Thu, 18 Aug 2011 10:24:25 +0800
Subject: [PATCH 483/600] targets/egl-static: do not rely on libudev on Android

There is no libudev on Android.  Use DRM to get the PCI ID directly.

Reviewed-by: Benjamin Franzke <benjaminfranzke@googlemail.com>
---
 src/gallium/targets/egl-static/egl.c | 64 ++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/src/gallium/targets/egl-static/egl.c b/src/gallium/targets/egl-static/egl.c
index 568f5498dd4..6e5f6f30b11 100644
--- a/src/gallium/targets/egl-static/egl.c
+++ b/src/gallium/targets/egl-static/egl.c
@@ -109,6 +109,70 @@ out:
    return (*chip_id >= 0);
 }
 
+#elif defined(PIPE_OS_ANDROID)
+
+#include <xf86drm.h>
+/* for i915 */
+#include <i915_drm.h>
+/* for radeon */
+#include <radeon_drm.h>
+/* for util_strcmp */
+#include "util/u_string.h"
+
+static boolean
+drm_fd_get_pci_id(int fd, int *vendor_id, int *chip_id)
+{
+   drmVersionPtr version;
+
+   *chip_id = -1;
+
+   version = drmGetVersion(fd);
+   if (!version) {
+      _eglLog(_EGL_WARNING, "invalid drm fd");
+      return FALSE;
+   }
+   if (!version->name) {
+      _eglLog(_EGL_WARNING, "unable to determine the driver name");
+      drmFreeVersion(version);
+      return FALSE;
+   }
+
+   if (util_strcmp(version->name, "i915") == 0) {
+      struct drm_i915_getparam gp;
+      int ret;
+
+      *vendor_id = 0x8086;
+
+      memset(&gp, 0, sizeof(gp));
+      gp.param = I915_PARAM_CHIPSET_ID;
+      gp.value = chip_id;
+      ret = drmCommandWriteRead(fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
+      if (ret) {
+         _eglLog(_EGL_WARNING, "failed to get param for i915");
+	 *chip_id = -1;
+      }
+   }
+   else if (util_strcmp(version->name, "radeon") == 0) {
+      struct drm_radeon_info info;
+      int ret;
+
+      *vendor_id = 0x1002;
+
+      memset(&info, 0, sizeof(info));
+      info.request = RADEON_INFO_DEVICE_ID;
+      info.value = (unsigned long) chip_id;
+      ret = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
+      if (ret) {
+         _eglLog(_EGL_WARNING, "failed to get info for radeon");
+	 *chip_id = -1;
+      }
+   }
+
+   drmFreeVersion(version);
+
+   return (*chip_id >= 0);
+}
+
 #else
 
 static boolean

From c9b21d986e6421d57c429f5ba8ca1fc8868e76dc Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 17 Aug 2011 11:07:01 +0800
Subject: [PATCH 484/600] android: build libGLES_mesa

This is the first step to integrate Mesa into Android(-x86) build
system.  You can git clone mesa under the external/ directory of Android
source tree and build Android with

 $ make BOARD_GPU_DRIVERS=swrast

It will build libGLES_mesa that will be loaded by Android runtime.

libGLES_mesa is still a stub in this commit.
---
 Android.common.mk | 48 +++++++++++++++++++++++++
 Android.mk        | 91 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 139 insertions(+)
 create mode 100644 Android.common.mk
 create mode 100644 Android.mk

diff --git a/Android.common.mk b/Android.common.mk
new file mode 100644
index 00000000000..83177a0cbe7
--- /dev/null
+++ b/Android.common.mk
@@ -0,0 +1,48 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_C_INCLUDES += \
+	$(MESA_TOP)/include
+
+LOCAL_CFLAGS += \
+	-DPTHREADS \
+	-fvisibility=hidden \
+	-Wno-sign-compare
+
+ifeq ($(strip $(MESA_ENABLE_ASM)),true)
+ifeq ($(TARGET_ARCH),x86)
+LOCAL_CFLAGS += \
+	-DUSE_X86_ASM
+endif
+endif
+
+LOCAL_CPPFLAGS += \
+	-Wno-error=non-virtual-dtor \
+	-Wno-non-virtual-dtor
+
+# uncomment to keep the debug symbols
+#LOCAL_STRIP_MODULE := false
+
+ifeq ($(strip $(LOCAL_MODULE_TAGS)),)
+LOCAL_MODULE_TAGS := optional
+endif
diff --git a/Android.mk b/Android.mk
new file mode 100644
index 00000000000..b03196ae3c5
--- /dev/null
+++ b/Android.mk
@@ -0,0 +1,91 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# BOARD_GPU_DRIVERS should be defined.  The valid values are
+#
+#   classic drivers:
+#   gallium drivers: swrast
+#
+# The main target is libGLES_mesa.  There is no classic drivers yet.
+
+MESA_TOP := $(call my-dir)
+MESA_COMMON_MK := $(MESA_TOP)/Android.common.mk
+MESA_PYTHON2 := python
+
+DRM_TOP := external/drm
+DRM_GRALLOC_TOP := hardware/drm_gralloc
+
+classic_drivers :=
+gallium_drivers := swrast
+
+MESA_GPU_DRIVERS := $(BOARD_GPU_DRIVERS)
+
+# warn about invalid drivers
+invalid_drivers := $(filter-out \
+	$(classic_drivers) $(gallium_drivers), $(MESA_GPU_DRIVERS))
+ifneq ($(invalid_drivers),)
+$(warning invalid GPU drivers: $(invalid_drivers))
+# tidy up
+MESA_GPU_DRIVERS := $(filter-out $(invalid_drivers), $(MESA_GPU_DRIVERS))
+endif
+
+# host and target must be the same arch to generate matypes.h
+ifeq ($(TARGET_ARCH),$(HOST_ARCH))
+MESA_ENABLE_ASM := true
+else
+MESA_ENABLE_ASM := false
+endif
+
+ifneq ($(filter $(classic_drivers), $(MESA_GPU_DRIVERS)),)
+MESA_BUILD_CLASSIC := true
+else
+MESA_BUILD_CLASSIC := false
+endif
+
+ifneq ($(filter $(gallium_drivers), $(MESA_GPU_DRIVERS)),)
+MESA_BUILD_GALLIUM := true
+else
+MESA_BUILD_GALLIUM := false
+endif
+
+ifneq ($(strip $(MESA_GPU_DRIVERS)),)
+
+# ---------------------------------------
+# Build libGLES_mesa
+# ---------------------------------------
+
+LOCAL_PATH := $(MESA_TOP)
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES :=
+LOCAL_CFLAGS :=
+LOCAL_C_INCLUDES :=
+
+LOCAL_MODULE := libGLES_mesa
+LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/egl
+
+include $(MESA_COMMON_MK)
+include $(BUILD_SHARED_LIBRARY)
+
+endif # MESA_GPU_DRIVERS

From 688db6e8dc1507021cd84733ea379e97f0265351 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 17 Aug 2011 11:38:00 +0800
Subject: [PATCH 485/600] android: build gallium auxiliaries

This builds the static library libmesa_gallium from gallium auxiliaries.
---
 Android.mk                       |   9 ++
 src/gallium/Android.common.mk    |  32 ++++
 src/gallium/Android.mk           |  33 ++++
 src/gallium/auxiliary/Android.mk | 252 +++++++++++++++++++++++++++++++
 4 files changed, 326 insertions(+)
 create mode 100644 src/gallium/Android.common.mk
 create mode 100644 src/gallium/Android.mk
 create mode 100644 src/gallium/auxiliary/Android.mk

diff --git a/Android.mk b/Android.mk
index b03196ae3c5..c1ee38102e9 100644
--- a/Android.mk
+++ b/Android.mk
@@ -70,6 +70,12 @@ endif
 
 ifneq ($(strip $(MESA_GPU_DRIVERS)),)
 
+SUBDIRS :=
+
+ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
+SUBDIRS += src/gallium
+endif
+
 # ---------------------------------------
 # Build libGLES_mesa
 # ---------------------------------------
@@ -88,4 +94,7 @@ LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/egl
 include $(MESA_COMMON_MK)
 include $(BUILD_SHARED_LIBRARY)
 
+mkfiles := $(patsubst %,$(MESA_TOP)/%/Android.mk,$(SUBDIRS))
+-include $(mkfiles)
+
 endif # MESA_GPU_DRIVERS
diff --git a/src/gallium/Android.common.mk b/src/gallium/Android.common.mk
new file mode 100644
index 00000000000..782510ff0f4
--- /dev/null
+++ b/src/gallium/Android.common.mk
@@ -0,0 +1,32 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# src/gallium/Android.common.mk
+
+LOCAL_C_INCLUDES += \
+	$(GALLIUM_TOP)/include \
+	$(GALLIUM_TOP)/auxiliary \
+	$(GALLIUM_TOP)/winsys \
+	$(GALLIUM_TOP)/drivers
+
+include $(MESA_COMMON_MK)
diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
new file mode 100644
index 00000000000..9a2fd2a39c6
--- /dev/null
+++ b/src/gallium/Android.mk
@@ -0,0 +1,33 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# src/gallium/Android.mk
+
+GALLIUM_TOP := $(call my-dir)
+GALLIUM_COMMON_MK := $(GALLIUM_TOP)/Android.common.mk
+
+SUBDIRS := \
+	auxiliary
+
+mkfiles := $(patsubst %,$(GALLIUM_TOP)/%/Android.mk,$(SUBDIRS))
+include $(mkfiles)
diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk
new file mode 100644
index 00000000000..e8390c1f93e
--- /dev/null
+++ b/src/gallium/auxiliary/Android.mk
@@ -0,0 +1,252 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# from Makefile
+C_SOURCES = \
+	cso_cache/cso_cache.c \
+	cso_cache/cso_context.c \
+	cso_cache/cso_hash.c \
+	draw/draw_context.c \
+	draw/draw_fs.c \
+	draw/draw_gs.c \
+	draw/draw_pipe.c \
+	draw/draw_pipe_aaline.c \
+	draw/draw_pipe_aapoint.c \
+	draw/draw_pipe_clip.c \
+	draw/draw_pipe_cull.c \
+	draw/draw_pipe_flatshade.c \
+	draw/draw_pipe_offset.c \
+	draw/draw_pipe_pstipple.c \
+	draw/draw_pipe_stipple.c \
+	draw/draw_pipe_twoside.c \
+	draw/draw_pipe_unfilled.c \
+	draw/draw_pipe_util.c \
+	draw/draw_pipe_validate.c \
+	draw/draw_pipe_vbuf.c \
+	draw/draw_pipe_wide_line.c \
+	draw/draw_pipe_wide_point.c \
+	draw/draw_pt.c \
+	draw/draw_pt_emit.c \
+	draw/draw_pt_fetch.c \
+	draw/draw_pt_fetch_emit.c \
+	draw/draw_pt_fetch_shade_emit.c \
+	draw/draw_pt_fetch_shade_pipeline.c \
+	draw/draw_pt_post_vs.c \
+	draw/draw_pt_so_emit.c \
+	draw/draw_pt_util.c \
+	draw/draw_pt_vsplit.c \
+	draw/draw_vertex.c \
+	draw/draw_vs.c \
+	draw/draw_vs_aos.c \
+	draw/draw_vs_aos_io.c \
+	draw/draw_vs_aos_machine.c \
+	draw/draw_vs_exec.c \
+	draw/draw_vs_ppc.c \
+	draw/draw_vs_sse.c \
+	draw/draw_vs_variant.c \
+	indices/u_indices_gen.c \
+	indices/u_unfilled_gen.c \
+	os/os_misc.c \
+	os/os_stream.c \
+	os/os_stream_log.c \
+	os/os_stream_null.c \
+	os/os_stream_stdc.c \
+	os/os_stream_str.c \
+	os/os_time.c \
+	pipebuffer/pb_buffer_fenced.c \
+	pipebuffer/pb_buffer_malloc.c \
+	pipebuffer/pb_bufmgr_alt.c \
+	pipebuffer/pb_bufmgr_cache.c \
+	pipebuffer/pb_bufmgr_debug.c \
+	pipebuffer/pb_bufmgr_mm.c \
+	pipebuffer/pb_bufmgr_ondemand.c \
+	pipebuffer/pb_bufmgr_pool.c \
+	pipebuffer/pb_bufmgr_slab.c \
+	pipebuffer/pb_validate.c \
+	rbug/rbug_connection.c \
+	rbug/rbug_context.c \
+	rbug/rbug_core.c \
+	rbug/rbug_demarshal.c \
+	rbug/rbug_texture.c \
+	rbug/rbug_shader.c \
+	rtasm/rtasm_cpu.c \
+	rtasm/rtasm_execmem.c \
+	rtasm/rtasm_ppc.c \
+	rtasm/rtasm_ppc_spe.c \
+	rtasm/rtasm_x86sse.c \
+	tgsi/tgsi_build.c \
+	tgsi/tgsi_dump.c \
+	tgsi/tgsi_exec.c \
+	tgsi/tgsi_info.c \
+	tgsi/tgsi_iterate.c \
+	tgsi/tgsi_parse.c \
+	tgsi/tgsi_ppc.c \
+	tgsi/tgsi_sanity.c \
+	tgsi/tgsi_scan.c \
+	tgsi/tgsi_sse2.c \
+	tgsi/tgsi_text.c \
+	tgsi/tgsi_transform.c \
+	tgsi/tgsi_ureg.c \
+	tgsi/tgsi_util.c \
+	translate/translate.c \
+	translate/translate_cache.c \
+	translate/translate_generic.c \
+	translate/translate_sse.c \
+	util/u_debug.c \
+	util/u_debug_describe.c \
+	util/u_debug_refcnt.c \
+	util/u_debug_stack.c \
+	util/u_debug_symbol.c \
+	util/u_dump_defines.c \
+	util/u_dump_state.c \
+	util/u_bitmask.c \
+	util/u_blit.c \
+	util/u_blitter.c \
+	util/u_cache.c \
+	util/u_caps.c \
+	util/u_cpu_detect.c \
+	util/u_dl.c \
+	util/u_draw.c \
+	util/u_draw_quad.c \
+	util/u_format.c \
+	util/u_format_other.c \
+	util/u_format_latc.c \
+	util/u_format_s3tc.c \
+	util/u_format_rgtc.c \
+	util/u_format_srgb.c \
+	util/u_format_table.c \
+	util/u_format_tests.c \
+	util/u_format_yuv.c \
+	util/u_format_zs.c \
+	util/u_framebuffer.c \
+	util/u_gen_mipmap.c \
+	util/u_half.c \
+	util/u_handle_table.c \
+	util/u_hash.c \
+	util/u_hash_table.c \
+	util/u_index_modify.c \
+	util/u_keymap.c \
+	util/u_linear.c \
+	util/u_linkage.c \
+	util/u_network.c \
+	util/u_math.c \
+	util/u_mm.c \
+	util/u_pstipple.c \
+	util/u_rect.c \
+	util/u_ringbuffer.c \
+	util/u_sampler.c \
+	util/u_simple_shaders.c \
+	util/u_slab.c \
+	util/u_snprintf.c \
+	util/u_staging.c \
+	util/u_surface.c \
+	util/u_surfaces.c \
+	util/u_texture.c \
+	util/u_tile.c \
+	util/u_transfer.c \
+	util/u_resource.c \
+	util/u_upload_mgr.c \
+	util/u_vbuf_mgr.c \
+	vl/vl_csc.c \
+	vl/vl_compositor.c \
+	vl/vl_decoder.c \
+	vl/vl_mpeg12_decoder.c \
+	vl/vl_mpeg12_bitstream.c \
+	vl/vl_zscan.c \
+        vl/vl_idct.c \
+	vl/vl_mc.c \
+        vl/vl_vertex_buffers.c \
+        vl/vl_video_buffer.c
+
+GALLIVM_SOURCES = \
+        gallivm/lp_bld_arit.c \
+        gallivm/lp_bld_assert.c \
+        gallivm/lp_bld_bitarit.c \
+        gallivm/lp_bld_const.c \
+        gallivm/lp_bld_conv.c \
+        gallivm/lp_bld_flow.c \
+        gallivm/lp_bld_format_aos.c \
+        gallivm/lp_bld_format_soa.c \
+        gallivm/lp_bld_format_yuv.c \
+        gallivm/lp_bld_gather.c \
+        gallivm/lp_bld_init.c \
+        gallivm/lp_bld_intr.c \
+        gallivm/lp_bld_logic.c \
+        gallivm/lp_bld_pack.c \
+        gallivm/lp_bld_printf.c \
+        gallivm/lp_bld_quad.c \
+        gallivm/lp_bld_sample.c \
+        gallivm/lp_bld_sample_aos.c \
+        gallivm/lp_bld_sample_soa.c \
+        gallivm/lp_bld_struct.c \
+        gallivm/lp_bld_swizzle.c \
+        gallivm/lp_bld_tgsi_aos.c \
+        gallivm/lp_bld_tgsi_info.c \
+        gallivm/lp_bld_tgsi_soa.c \
+        gallivm/lp_bld_type.c \
+        draw/draw_llvm.c \
+        draw/draw_llvm_sample.c \
+        draw/draw_llvm_translate.c \
+        draw/draw_vs_llvm.c \
+        draw/draw_pt_fetch_shade_pipeline_llvm.c
+
+GALLIVM_CPP_SOURCES = \
+	gallivm/lp_bld_debug.cpp \
+	gallivm/lp_bld_misc.cpp
+
+GENERATED_SOURCES = \
+	indices/u_indices_gen.c \
+	indices/u_unfilled_gen.c \
+	util/u_format_srgb.c \
+	util/u_format_table.c \
+	util/u_half.c
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(filter-out $(GENERATED_SOURCES), $(C_SOURCES))
+
+LOCAL_C_INCLUDES := $(GALLIUM_TOP)/auxiliary/util
+
+LOCAL_MODULE := libmesa_gallium
+
+# generate sources
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+intermediates := $(call local-intermediates-dir)
+LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, $(GENERATED_SOURCES))
+
+$(LOCAL_GENERATED_SOURCES): PRIVATE_PYTHON := $(MESA_PYTHON2)
+$(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ > $@
+
+$(intermediates)/indices/u_indices_gen.c \
+$(intermediates)/indices/u_unfilled_gen.c \
+$(intermediates)/util/u_format_srgb.c \
+$(intermediates)/util/u_half.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py
+	$(transform-generated-source)
+
+$(intermediates)/util/u_format_table.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py $(LOCAL_PATH)/util/u_format.csv
+	$(transform-generated-source)
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)

From 98345cf1b54f979fa12526ad89e40ded3ba4f542 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 17 Aug 2011 11:41:40 +0800
Subject: [PATCH 486/600] android: build st/egl with android backend

This builds the static library libmesa_st_egl from st/egl.
---
 src/gallium/Android.mk                    |  1 +
 src/gallium/state_trackers/egl/Android.mk | 54 +++++++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 src/gallium/state_trackers/egl/Android.mk

diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index 9a2fd2a39c6..bf89e1a5779 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -27,6 +27,7 @@ GALLIUM_TOP := $(call my-dir)
 GALLIUM_COMMON_MK := $(GALLIUM_TOP)/Android.common.mk
 
 SUBDIRS := \
+	state_trackers/egl \
 	auxiliary
 
 mkfiles := $(patsubst %,$(GALLIUM_TOP)/%/Android.mk,$(SUBDIRS))
diff --git a/src/gallium/state_trackers/egl/Android.mk b/src/gallium/state_trackers/egl/Android.mk
new file mode 100644
index 00000000000..e459bd4655e
--- /dev/null
+++ b/src/gallium/state_trackers/egl/Android.mk
@@ -0,0 +1,54 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+common_SOURCES := \
+	common/egl_g3d.c \
+	common/egl_g3d_api.c \
+	common/egl_g3d_image.c \
+	common/egl_g3d_st.c \
+	common/egl_g3d_sync.c \
+	common/native_helper.c
+
+android_SOURCES := \
+	android/native_android.cpp
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(common_SOURCES) \
+	$(android_SOURCES)
+
+LOCAL_CFLAGS := -DHAVE_ANDROID_BACKEND
+
+LOCAL_C_INCLUDES := \
+	$(GALLIUM_TOP)/state_trackers/egl \
+	$(GALLIUM_TOP)/winsys/sw \
+	$(MESA_TOP)/src/egl/main \
+	$(DRM_GRALLOC_TOP)
+
+LOCAL_MODULE := libmesa_st_egl
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)

From b38da5f0a16115b1574ae52582a4655bbe7045f1 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 19 Aug 2011 00:28:22 +0800
Subject: [PATCH 487/600] android: build targets/egl-static

This builds the static library libmesa_egl_gallium from
targets/egl-static.
---
 src/gallium/Android.mk                    |  1 +
 src/gallium/targets/egl-static/Android.mk | 52 +++++++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 src/gallium/targets/egl-static/Android.mk

diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index bf89e1a5779..3a813bb4d54 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -27,6 +27,7 @@ GALLIUM_TOP := $(call my-dir)
 GALLIUM_COMMON_MK := $(GALLIUM_TOP)/Android.common.mk
 
 SUBDIRS := \
+	targets/egl-static \
 	state_trackers/egl \
 	auxiliary
 
diff --git a/src/gallium/targets/egl-static/Android.mk b/src/gallium/targets/egl-static/Android.mk
new file mode 100644
index 00000000000..8a65585b72b
--- /dev/null
+++ b/src/gallium/targets/egl-static/Android.mk
@@ -0,0 +1,52 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	egl.c \
+	egl_pipe.c \
+	egl_st.c
+
+LOCAL_CFLAGS := \
+	-DFEATURE_ES1=1 \
+	-DFEATURE_ES2=1 \
+	-D_EGL_MAIN=_eglBuiltInDriverGALLIUM
+
+LOCAL_C_INCLUDES := \
+	$(GALLIUM_TOP)/state_trackers/vega \
+	$(GALLIUM_TOP)/state_trackers/egl \
+	$(MESA_TOP)/src/egl/main \
+	$(MESA_TOP)/src/mesa \
+	$(DRM_TOP)/include/drm \
+	$(DRM_TOP)
+
+# swrast
+LOCAL_CFLAGS += -DGALLIUM_SOFTPIPE
+
+LOCAL_MODULE := libmesa_egl_gallium
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)

From 15576344f706a8ec850bdfcfa005917a7a7597df Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 17 Aug 2011 11:47:42 +0800
Subject: [PATCH 488/600] android: build android sw winsys

This builds the static library libmesa_winsys_sw_android from winsys/sw.
---
 src/gallium/Android.mk                   |  3 +++
 src/gallium/winsys/sw/android/Android.mk | 34 ++++++++++++++++++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 src/gallium/winsys/sw/android/Android.mk

diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index 3a813bb4d54..d2697c5ca6b 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -31,5 +31,8 @@ SUBDIRS := \
 	state_trackers/egl \
 	auxiliary
 
+# swrast
+SUBDIRS += winsys/sw/android
+
 mkfiles := $(patsubst %,$(GALLIUM_TOP)/%/Android.mk,$(SUBDIRS))
 include $(mkfiles)
diff --git a/src/gallium/winsys/sw/android/Android.mk b/src/gallium/winsys/sw/android/Android.mk
new file mode 100644
index 00000000000..4fb2715a56c
--- /dev/null
+++ b/src/gallium/winsys/sw/android/Android.mk
@@ -0,0 +1,34 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	android_sw_winsys.cpp
+
+LOCAL_MODULE := libmesa_winsys_sw_android
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)

From faf9d580f5ee4fd364f366ba50cfc99f4b31b04f Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 17 Aug 2011 11:51:08 +0800
Subject: [PATCH 489/600] android: build softpipe

This builds the static library libmesa_pipe_softpipe from softpipe.
---
 src/gallium/Android.mk                  |  2 +-
 src/gallium/drivers/softpipe/Android.mk | 67 +++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/drivers/softpipe/Android.mk

diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index d2697c5ca6b..bcbabba9db9 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -32,7 +32,7 @@ SUBDIRS := \
 	auxiliary
 
 # swrast
-SUBDIRS += winsys/sw/android
+SUBDIRS += winsys/sw/android drivers/softpipe
 
 mkfiles := $(patsubst %,$(GALLIUM_TOP)/%/Android.mk,$(SUBDIRS))
 include $(mkfiles)
diff --git a/src/gallium/drivers/softpipe/Android.mk b/src/gallium/drivers/softpipe/Android.mk
new file mode 100644
index 00000000000..d198fa5d0f2
--- /dev/null
+++ b/src/gallium/drivers/softpipe/Android.mk
@@ -0,0 +1,67 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# from Makefile
+C_SOURCES = \
+	sp_fs_exec.c \
+	sp_fs_sse.c \
+	sp_clear.c \
+	sp_fence.c \
+	sp_flush.c \
+	sp_query.c \
+	sp_context.c \
+	sp_draw_arrays.c \
+	sp_prim_vbuf.c \
+	sp_quad_pipe.c \
+	sp_quad_stipple.c \
+	sp_quad_depth_test.c \
+	sp_quad_fs.c \
+	sp_quad_blend.c \
+	sp_screen.c \
+        sp_setup.c \
+	sp_state_blend.c \
+	sp_state_clip.c \
+	sp_state_derived.c \
+	sp_state_sampler.c \
+	sp_state_shader.c \
+	sp_state_so.c \
+	sp_state_rasterizer.c \
+	sp_state_surface.c \
+	sp_state_vertex.c \
+	sp_texture.c \
+	sp_tex_sample.c \
+	sp_tex_tile_cache.c \
+	sp_tile_cache.c \
+	sp_surface.c
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(C_SOURCES)
+
+LOCAL_MODULE := libmesa_pipe_softpipe
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)

From 2a77dc0c0d6e36b64867ff21467aa7d86722395f Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 17 Aug 2011 11:24:11 +0800
Subject: [PATCH 490/600] android: build core EGL

This builds the static library libmesa_egl from core EGL.

Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 Android.mk              |  5 ++--
 src/egl/main/Android.mk | 64 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 2 deletions(-)
 create mode 100644 src/egl/main/Android.mk

diff --git a/Android.mk b/Android.mk
index c1ee38102e9..4178ea2f36e 100644
--- a/Android.mk
+++ b/Android.mk
@@ -70,7 +70,8 @@ endif
 
 ifneq ($(strip $(MESA_GPU_DRIVERS)),)
 
-SUBDIRS :=
+SUBDIRS := \
+	src/egl/main
 
 ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
 SUBDIRS += src/gallium
@@ -95,6 +96,6 @@ include $(MESA_COMMON_MK)
 include $(BUILD_SHARED_LIBRARY)
 
 mkfiles := $(patsubst %,$(MESA_TOP)/%/Android.mk,$(SUBDIRS))
--include $(mkfiles)
+include $(mkfiles)
 
 endif # MESA_GPU_DRIVERS
diff --git a/src/egl/main/Android.mk b/src/egl/main/Android.mk
new file mode 100644
index 00000000000..25a7c657676
--- /dev/null
+++ b/src/egl/main/Android.mk
@@ -0,0 +1,64 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for core EGL
+
+LOCAL_PATH := $(call my-dir)
+
+# from Makefile
+SOURCES = \
+	eglapi.c \
+	eglarray.c \
+	eglconfig.c \
+	eglcontext.c \
+	eglcurrent.c \
+	egldisplay.c \
+	egldriver.c \
+	eglfallbacks.c \
+	eglglobals.c \
+	eglimage.c \
+	egllog.c \
+	eglmisc.c \
+	eglmode.c \
+	eglscreen.c \
+	eglstring.c \
+	eglsurface.c \
+	eglsync.c
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(SOURCES)
+
+LOCAL_CFLAGS := \
+	-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_ANDROID \
+	-D_EGL_DRIVER_SEARCH_DIR=\"/system/lib/egl\" \
+	-D_EGL_OS_UNIX=1
+
+ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
+LOCAL_CFLAGS += -D_EGL_BUILT_IN_DRIVER_GALLIUM
+endif
+
+LOCAL_MODULE := libmesa_egl
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)

From ee40f18054088dad8e1d1eca2c44204576813d6b Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 17 Aug 2011 11:31:36 +0800
Subject: [PATCH 491/600] android: build core mesa

This builds the static library libmesa_st_mesa from core mesa.

Acked-by: Chad Versace <chad@chad-versace.us>
---
 Android.mk              |   1 +
 src/mesa/Android.gen.mk | 131 ++++++++++++++++++++++++++++++++++++++++
 src/mesa/Android.mk     |  83 +++++++++++++++++++++++++
 3 files changed, 215 insertions(+)
 create mode 100644 src/mesa/Android.gen.mk
 create mode 100644 src/mesa/Android.mk

diff --git a/Android.mk b/Android.mk
index 4178ea2f36e..0d9475fa9a8 100644
--- a/Android.mk
+++ b/Android.mk
@@ -71,6 +71,7 @@ endif
 ifneq ($(strip $(MESA_GPU_DRIVERS)),)
 
 SUBDIRS := \
+	src/mesa \
 	src/egl/main
 
 ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
diff --git a/src/mesa/Android.gen.mk b/src/mesa/Android.gen.mk
new file mode 100644
index 00000000000..2a08184aee6
--- /dev/null
+++ b/src/mesa/Android.gen.mk
@@ -0,0 +1,131 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# included by core mesa Android.mk for source generation
+
+ifeq ($(LOCAL_MODULE_CLASS),)
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+endif
+
+intermediates := $(call local-intermediates-dir)
+
+sources := \
+	main/api_exec_es1.c \
+	main/api_exec_es1_dispatch.h \
+	main/api_exec_es1_remap_helper.h \
+	main/api_exec_es2.c \
+	main/api_exec_es2_dispatch.h \
+	main/api_exec_es2_remap_helper.h \
+	program/lex.yy.c \
+	program/program_parse.tab.c
+
+LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+
+LOCAL_C_INCLUDES += $(intermediates)/main
+
+ifeq ($(strip $(MESA_ENABLE_ASM)),true)
+ifeq ($(TARGET_ARCH),x86)
+sources += x86/matypes.h
+LOCAL_C_INCLUDES += $(intermediates)/x86
+endif
+endif
+
+sources += main/git_sha1.h
+
+sources := $(addprefix $(intermediates)/, $(sources))
+LOCAL_GENERATED_SOURCES += $(sources)
+
+glapi := $(MESA_TOP)/src/mapi/glapi/gen
+
+es_src_deps := \
+	$(LOCAL_PATH)/main/APIspec.xml \
+	$(LOCAL_PATH)/main/es_generator.py \
+	$(LOCAL_PATH)/main/APIspecutil.py \
+	$(LOCAL_PATH)/main/APIspec.py
+
+es_hdr_deps := \
+	$(wildcard $(glapi)/*.py) \
+	$(wildcard $(glapi)/*.xml)
+
+define es-gen
+	@mkdir -p $(dir $@)
+	@echo "Gen ES: $(PRIVATE_MODULE) <= $(notdir $(@))"
+	$(hide) $(PRIVATE_SCRIPT) $(1) $(PRIVATE_XML) > $@
+endef
+
+define local-l-to-c
+	@mkdir -p $(dir $@)
+	@echo "Mesa Lex: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(LEX) -o$@ $<
+endef
+
+define local-y-to-c-and-h
+	@mkdir -p $(dir $@)
+	@echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(YACC) -o $@ $<
+endef
+
+$(intermediates)/main/api_exec_%.c: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/main/es_generator.py
+$(intermediates)/main/api_exec_%.c: PRIVATE_XML := -S $(LOCAL_PATH)/main/APIspec.xml
+$(intermediates)/main/api_exec_%_dispatch.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(glapi)/gl_table.py
+$(intermediates)/main/api_exec_%_dispatch.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml
+$(intermediates)/main/api_exec_%_remap_helper.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(glapi)/remap_helper.py
+$(intermediates)/main/api_exec_%_remap_helper.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml
+
+$(intermediates)/main/api_exec_es1.c: $(es_src_deps)
+	$(call es-gen,-V GLES1.1)
+
+$(intermediates)/main/api_exec_es2.c: $(es_src_deps)
+	$(call es-gen,-V GLES2.0)
+
+$(intermediates)/main/api_exec_%_dispatch.h: $(es_hdr_deps)
+	$(call es-gen, -c $* -m remap_table)
+
+$(intermediates)/main/api_exec_%_remap_helper.h: $(es_hdr_deps)
+	$(call es-gen, -c $*)
+
+$(intermediates)/program/program_parse.tab.c: $(LOCAL_PATH)/program/program_parse.y
+	$(local-y-to-c-and-h)
+
+$(intermediates)/program/lex.yy.c: $(LOCAL_PATH)/program/program_lexer.l
+	$(local-l-to-c)
+
+$(intermediates)/main/git_sha1.h:
+	@mkdir -p $(dir $@)
+	@echo "GIT-SHA1: $(PRIVATE_MODULE) <= git"
+	$(hide) touch $@
+	$(hide) if which git > /dev/null; then \
+			git --git-dir $(PRIVATE_PATH)/../../.git log -n 1 --oneline | \
+			sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \
+			> $@; \
+		fi
+
+matypes_deps := \
+	$(BUILD_OUT_EXECUTABLES)/mesa_gen_matypes$(BUILD_EXECUTABLE_SUFFIX) \
+	$(LOCAL_PATH)/main/mtypes.h \
+	$(LOCAL_PATH)/tnl/t_context.h
+
+$(intermediates)/x86/matypes.h: $(matypes_deps) 
+	@mkdir -p $(dir $@)
+	@echo "MATYPES: $(PRIVATE_MODULE) <= $(notdir $@)"
+	$(hide) $< > $@
diff --git a/src/mesa/Android.mk b/src/mesa/Android.mk
new file mode 100644
index 00000000000..e32fabed49e
--- /dev/null
+++ b/src/mesa/Android.mk
@@ -0,0 +1,83 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for core mesa
+
+LOCAL_PATH := $(call my-dir)
+
+include $(LOCAL_PATH)/sources.mak
+
+common_CFLAGS := \
+	-DFEATURE_ES1=1 \
+	-DFEATURE_ES2=1
+
+common_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/glsl
+
+common_ASM :=
+
+# ---------------------------------------
+# Build mesa_gen_matypes for host
+# ---------------------------------------
+
+ifeq ($(strip $(MESA_ENABLE_ASM)),true)
+ifeq ($(TARGET_ARCH),x86)
+common_ASM += $(X86_SOURCES)
+
+include $(CLEAR_VARS)
+LOCAL_SRC_FILES := x86/gen_matypes.c
+LOCAL_CFLAGS := $(common_CFLAGS)
+LOCAL_C_INCLUDES := $(common_C_INCLUDES)
+LOCAL_MODULE := mesa_gen_matypes
+include $(MESA_COMMON_MK)
+include $(BUILD_HOST_EXECUTABLE)
+
+endif # x86
+endif # MESA_ENABLE_ASM
+
+# ---------------------------------------
+# Build libmesa_st_mesa
+# ---------------------------------------
+
+ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(MESA_GALLIUM_SOURCES) \
+	$(MESA_GALLIUM_CXX_SOURCES) \
+	$(common_ASM)
+
+LOCAL_CFLAGS := $(common_CFLAGS)
+
+LOCAL_C_INCLUDES := \
+	$(common_C_INCLUDES) \
+	$(MESA_TOP)/src/gallium/include \
+	$(MESA_TOP)/src/gallium/auxiliary
+
+LOCAL_MODULE := libmesa_st_mesa
+
+include $(LOCAL_PATH)/Android.gen.mk
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+endif # MESA_BUILD_GALLIUM

From b81b82df955ad19eec55fa593b533673f083f593 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 17 Aug 2011 11:56:43 +0800
Subject: [PATCH 492/600] android: build glsl

This builds the static library libmesa_glsl and executable glsl_compiler
from glsl.  glsl_compiler is only installed for engineering build.

Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 Android.mk              |   1 +
 src/glsl/Android.gen.mk |  98 +++++++++++++++++++++++
 src/glsl/Android.mk     | 171 ++++++++++++++++++++++++++++++++++++++++
 src/mesa/Android.mk     |  32 ++++++++
 4 files changed, 302 insertions(+)
 create mode 100644 src/glsl/Android.gen.mk
 create mode 100644 src/glsl/Android.mk

diff --git a/Android.mk b/Android.mk
index 0d9475fa9a8..def8ec2de75 100644
--- a/Android.mk
+++ b/Android.mk
@@ -71,6 +71,7 @@ endif
 ifneq ($(strip $(MESA_GPU_DRIVERS)),)
 
 SUBDIRS := \
+	src/glsl \
 	src/mesa \
 	src/egl/main
 
diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk
new file mode 100644
index 00000000000..e4ccb7291ef
--- /dev/null
+++ b/src/glsl/Android.gen.mk
@@ -0,0 +1,98 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# included by glsl Android.mk for source generation
+
+ifeq ($(LOCAL_MODULE_CLASS),)
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+endif
+
+intermediates := $(call local-intermediates-dir)
+
+sources := \
+	glsl_lexer.cpp \
+	glsl_parser.cpp \
+	glcpp/glcpp-lex.c \
+	glcpp/glcpp-parse.c
+
+ifneq ($(LOCAL_IS_HOST_MODULE),true)
+sources += builtin_function.cpp
+endif
+
+LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+
+LOCAL_C_INCLUDES += $(intermediates) $(intermediates)/glcpp $(MESA_TOP)/src/glsl/glcpp
+
+sources := $(addprefix $(intermediates)/, $(sources))
+LOCAL_GENERATED_SOURCES += $(sources)
+
+define local-l-or-ll-to-c-or-cpp
+	@mkdir -p $(dir $@)
+	@echo "Mesa Lex: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(LEX) --nounistd -o$@ $<
+endef
+
+define local-y-to-c-and-h
+	@mkdir -p $(dir $@)
+	@echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(YACC) -o $@ $<
+endef
+
+define local-yy-to-cpp-and-h
+	@mkdir -p $(dir $@)
+	@echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<"
+	$(hide) $(YACC) -p "_mesa_glsl_" -o $@ $<
+	touch $(@:$1=$(YACC_HEADER_SUFFIX))
+	echo '#ifndef '$(@F:$1=_h) > $(@:$1=.h)
+	echo '#define '$(@F:$1=_h) >> $(@:$1=.h)
+	cat $(@:$1=$(YACC_HEADER_SUFFIX)) >> $(@:$1=.h)
+	echo '#endif' >> $(@:$1=.h)
+	rm -f $(@:$1=$(YACC_HEADER_SUFFIX))
+endef
+
+$(intermediates)/glsl_lexer.cpp: $(LOCAL_PATH)/glsl_lexer.ll
+	$(call local-l-or-ll-to-c-or-cpp)
+
+$(intermediates)/glsl_parser.cpp: $(LOCAL_PATH)/glsl_parser.yy
+	$(call local-yy-to-cpp-and-h,.cpp)
+
+$(intermediates)/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glcpp/glcpp-lex.l
+	$(call local-l-or-ll-to-c-or-cpp)
+
+$(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y
+	$(call local-y-to-c-and-h)
+
+BUILTIN_COMPILER := $(BUILD_OUT_EXECUTABLES)/mesa_builtin_compiler$(BUILD_EXECUTABLE_SUFFIX)
+
+builtin_function_deps := \
+	$(LOCAL_PATH)/builtins/tools/generate_builtins.py \
+	$(LOCAL_PATH)/builtins/tools/texture_builtins.py \
+	$(BUILTIN_COMPILER) \
+	$(wildcard $(LOCAL_PATH)/builtins/profiles/*) \
+       	$(wildcard $(LOCAL_PATH)/builtins/ir/*)
+
+$(intermediates)/builtin_function.cpp: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/builtins/tools/generate_builtins.py
+$(intermediates)/builtin_function.cpp: $(builtin_function_deps)
+	@mkdir -p $(dir $@)
+	@echo "Gen GLSL: $(PRIVATE_MODULE) <= $(notdir $@)"
+	$(hide) $(PRIVATE_SCRIPT) $(BUILTIN_COMPILER) > $@ || rm -f $@
diff --git a/src/glsl/Android.mk b/src/glsl/Android.mk
new file mode 100644
index 00000000000..d0b3ff3becf
--- /dev/null
+++ b/src/glsl/Android.mk
@@ -0,0 +1,171 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for glsl
+
+LOCAL_PATH := $(call my-dir)
+
+# from Makefile
+LIBGLCPP_SOURCES = \
+	glcpp/glcpp-lex.c \
+	glcpp/glcpp-parse.c \
+	glcpp/pp.c
+
+C_SOURCES = \
+	strtod.c \
+	ralloc.c \
+	$(LIBGLCPP_SOURCES)
+
+CXX_SOURCES = \
+	ast_expr.cpp \
+	ast_function.cpp \
+	ast_to_hir.cpp \
+	ast_type.cpp \
+	glsl_lexer.cpp \
+	glsl_parser.cpp \
+	glsl_parser_extras.cpp \
+	glsl_types.cpp \
+	glsl_symbol_table.cpp \
+	hir_field_selection.cpp \
+	ir_basic_block.cpp \
+	ir_clone.cpp \
+	ir_constant_expression.cpp \
+	ir.cpp \
+	ir_expression_flattening.cpp \
+	ir_function_can_inline.cpp \
+	ir_function_detect_recursion.cpp \
+	ir_function.cpp \
+	ir_hierarchical_visitor.cpp \
+	ir_hv_accept.cpp \
+	ir_import_prototypes.cpp \
+	ir_print_visitor.cpp \
+	ir_reader.cpp \
+	ir_rvalue_visitor.cpp \
+	ir_set_program_inouts.cpp \
+	ir_validate.cpp \
+	ir_variable.cpp \
+	ir_variable_refcount.cpp \
+	linker.cpp \
+	link_functions.cpp \
+	loop_analysis.cpp \
+	loop_controls.cpp \
+	loop_unroll.cpp \
+	lower_discard.cpp \
+	lower_if_to_cond_assign.cpp \
+	lower_instructions.cpp \
+	lower_jumps.cpp \
+	lower_mat_op_to_vec.cpp \
+	lower_noise.cpp \
+	lower_texture_projection.cpp \
+	lower_variable_index_to_cond_assign.cpp \
+	lower_vec_index_to_cond_assign.cpp \
+	lower_vec_index_to_swizzle.cpp \
+	lower_vector.cpp \
+	opt_algebraic.cpp \
+	opt_constant_folding.cpp \
+	opt_constant_propagation.cpp \
+	opt_constant_variable.cpp \
+	opt_copy_propagation.cpp \
+	opt_copy_propagation_elements.cpp \
+	opt_dead_code.cpp \
+	opt_dead_code_local.cpp \
+	opt_dead_functions.cpp \
+	opt_discard_simplification.cpp \
+	opt_function_inlining.cpp \
+	opt_if_simplification.cpp \
+	opt_noop_swizzle.cpp \
+	opt_redundant_jumps.cpp \
+	opt_structure_splitting.cpp \
+	opt_swizzle_swizzle.cpp \
+	opt_tree_grafting.cpp \
+	s_expression.cpp
+
+# ---------------------------------------
+# Build libmesa_glsl
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(C_SOURCES) \
+	$(CXX_SOURCES) \
+	builtin_function.cpp
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa
+
+LOCAL_MODULE := libmesa_glsl
+
+include $(LOCAL_PATH)/Android.gen.mk
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+
+# ---------------------------------------
+# Build mesa_builtin_compiler for host
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(C_SOURCES) \
+	$(CXX_SOURCES) \
+	builtin_stubs.cpp \
+	main.cpp \
+	standalone_scaffolding.cpp
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa
+
+LOCAL_STATIC_LIBRARIES := libmesa_glsl_utils
+
+LOCAL_MODULE := mesa_builtin_compiler
+
+LOCAL_MODULE_CLASS := EXECUTABLES
+LOCAL_IS_HOST_MODULE := true
+include $(LOCAL_PATH)/Android.gen.mk
+include $(MESA_COMMON_MK)
+include $(BUILD_HOST_EXECUTABLE)
+
+# ---------------------------------------
+# Build glsl_compiler
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	main.cpp \
+	standalone_scaffolding.cpp
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa
+
+LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_glsl_utils
+
+LOCAL_MODULE_TAGS := eng
+LOCAL_MODULE := glsl_compiler
+
+include $(MESA_COMMON_MK)
+include $(BUILD_EXECUTABLE)
diff --git a/src/mesa/Android.mk b/src/mesa/Android.mk
index e32fabed49e..67808d491ac 100644
--- a/src/mesa/Android.mk
+++ b/src/mesa/Android.mk
@@ -81,3 +81,35 @@ include $(LOCAL_PATH)/Android.gen.mk
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
 endif # MESA_BUILD_GALLIUM
+
+# ---------------------------------------
+# Build libmesa_glsl_utils
+#
+# It is used to avoid circular dependency between core mesa and glsl.
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	program/hash_table.c \
+	program/symbol_table.c
+
+LOCAL_MODULE := libmesa_glsl_utils
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
+
+# ---------------------------------------
+# Build libmesa_glsl_utils for host
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	program/hash_table.c \
+	program/symbol_table.c
+
+LOCAL_MODULE := libmesa_glsl_utils
+
+include $(MESA_COMMON_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)

From ee41fc898d7c362d6f34489895f4daed745d7166 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 17 Aug 2011 12:10:12 +0800
Subject: [PATCH 493/600] android: build shared glapi

This builds the shared library libglapi from shared glapi.

Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 Android.mk          |  1 +
 src/mapi/Android.mk | 60 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)
 create mode 100644 src/mapi/Android.mk

diff --git a/Android.mk b/Android.mk
index def8ec2de75..f585575097a 100644
--- a/Android.mk
+++ b/Android.mk
@@ -71,6 +71,7 @@ endif
 ifneq ($(strip $(MESA_GPU_DRIVERS)),)
 
 SUBDIRS := \
+	src/mapi \
 	src/glsl \
 	src/mesa \
 	src/egl/main
diff --git a/src/mapi/Android.mk b/src/mapi/Android.mk
new file mode 100644
index 00000000000..0d09ee13f99
--- /dev/null
+++ b/src/mapi/Android.mk
@@ -0,0 +1,60 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for glapi
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+include $(LOCAL_PATH)/mapi/sources.mak
+LOCAL_SRC_FILES := $(addprefix mapi/, $(MAPI_GLAPI_SOURCES))
+
+LOCAL_CFLAGS := \
+	-DMAPI_MODE_GLAPI \
+	-DMAPI_ABI_HEADER=\"shared-glapi/glapi_mapi_tmp.h\"
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi
+
+LOCAL_MODULE := libglapi
+
+LOCAL_MODULE_CLASS := SHARED_LIBRARIES
+intermediates := $(call local-intermediates-dir)
+mapi_abi_header := $(intermediates)/shared-glapi/glapi_mapi_tmp.h
+LOCAL_GENERATED_SOURCES := $(mapi_abi_header)
+
+mapi_abi_deps := \
+	$(wildcard $(LOCAL_PATH)/glapi/gen/*.py) \
+	$(wildcard $(LOCAL_PATH)/glapi/gen/*.xml) \
+	$(LOCAL_PATH)/mapi/mapi_abi.py
+
+$(mapi_abi_header): PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/mapi/mapi_abi.py
+$(mapi_abi_header): PRIVATE_APIXML := $(LOCAL_PATH)/glapi/gen/gl_and_es_API.xml
+$(mapi_abi_header): $(mapi_abi_deps)
+	@mkdir -p $(dir $@)
+	@echo "Gen GLAPI: $(PRIVATE_MODULE) <= $(notdir $@)"
+	$(hide) $(PRIVATE_SCRIPT) --printer shared-glapi --mode lib $(PRIVATE_APIXML) > $@
+
+include $(MESA_COMMON_MK)
+include $(BUILD_SHARED_LIBRARY)

From 327de226ae0b8b7a5ec4d327c21cc85d84ce09cb Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 17 Aug 2011 12:19:23 +0800
Subject: [PATCH 494/600] android: make libGLES_mesa real

libGLES_mesa with swrast should link in these libraries

  libmesa_egl
  libmesa_egl_gallium
  libmesa_st_egl
  libmesa_st_mesa
  libmesa_glsl
  libmesa_glsl_utils
  libmesa_pipe_softpipe
  libmesa_winsys_sw_android
  libmesa_gallium

Reviewed-by: Chad Versace <chad@chad-versace.us>
---
 Android.mk | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/Android.mk b/Android.mk
index f585575097a..03acbaf2f1c 100644
--- a/Android.mk
+++ b/Android.mk
@@ -92,6 +92,43 @@ LOCAL_SRC_FILES :=
 LOCAL_CFLAGS :=
 LOCAL_C_INCLUDES :=
 
+LOCAL_STATIC_LIBRARIES :=
+LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_egl
+
+LOCAL_SHARED_LIBRARIES := \
+	libglapi \
+	libdrm \
+	libdl \
+	libhardware \
+	liblog \
+	libcutils
+
+ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
+
+gallium_DRIVERS :=
+
+# swrast
+gallium_DRIVERS += libmesa_pipe_softpipe libmesa_winsys_sw_android
+
+#
+# Notes about the order here:
+#
+#  * libmesa_st_egl depends on libmesa_winsys_sw_android in $(gallium_DRIVERS)
+#  * libmesa_st_mesa depends on libmesa_glsl
+#  * libmesa_glsl depends on libmesa_glsl_utils
+#
+LOCAL_STATIC_LIBRARIES := \
+	libmesa_egl_gallium \
+	libmesa_st_egl \
+	$(gallium_DRIVERS) \
+	libmesa_st_mesa \
+	libmesa_glsl \
+	libmesa_glsl_utils \
+	libmesa_gallium \
+	$(LOCAL_STATIC_LIBRARIES)
+
+endif # MESA_BUILD_GALLIUM
+
 LOCAL_MODULE := libGLES_mesa
 LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/egl
 

From f496d8b86da63f43239457a2f44920401d872bd6 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Thu, 4 Aug 2011 17:50:51 +0900
Subject: [PATCH 495/600] winsys/sw/android: set bo usage correctly

Since this is the software path, set GRALLOC_USAGE_SW_WRITE_OFTEN when
PIPE_BIND_RENDER_TARGET, and set GRALLOC_USAGE_SW_READ_OFTEN when
PIPE_BIND_SAMPLER_VIEW.
---
 src/gallium/winsys/sw/android/android_sw_winsys.cpp | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/gallium/winsys/sw/android/android_sw_winsys.cpp b/src/gallium/winsys/sw/android/android_sw_winsys.cpp
index 49d8aa9e19f..6ea48b25c39 100644
--- a/src/gallium/winsys/sw/android/android_sw_winsys.cpp
+++ b/src/gallium/winsys/sw/android/android_sw_winsys.cpp
@@ -158,17 +158,10 @@ android_displaytarget_from_handle(struct sw_winsys *ws,
    adt->width = templ->width0;
    adt->height = templ->height0;
 
-   if (templ->usage & PIPE_BIND_RENDER_TARGET)
-      adt->usage |= GRALLOC_USAGE_HW_RENDER;
-   if (templ->usage & PIPE_BIND_SAMPLER_VIEW)
-      adt->usage |= GRALLOC_USAGE_HW_TEXTURE;
-   if (templ->usage & PIPE_BIND_SCANOUT)
-      adt->usage |= GRALLOC_USAGE_HW_FB;
-
-   if (templ->usage & PIPE_BIND_TRANSFER_READ)
-      adt->usage |= GRALLOC_USAGE_SW_READ_OFTEN;
-   if (templ->usage & PIPE_BIND_TRANSFER_WRITE)
+   if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_TRANSFER_WRITE))
       adt->usage |= GRALLOC_USAGE_SW_WRITE_OFTEN;
+   if (templ->bind & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_TRANSFER_READ))
+      adt->usage |= GRALLOC_USAGE_SW_READ_OFTEN;
 
    if (stride)
       *stride = adt->stride;

From 9650483acded249d766441dd37db5af9b9b1e245 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Sun, 31 Jul 2011 12:08:24 +0900
Subject: [PATCH 496/600] winsys/sw/android: use HAL formats

Native buffers use HAL formats, not UI formats.
---
 .../winsys/sw/android/android_sw_winsys.cpp   | 27 +++++++------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/src/gallium/winsys/sw/android/android_sw_winsys.cpp b/src/gallium/winsys/sw/android/android_sw_winsys.cpp
index 6ea48b25c39..02faf1e0cca 100644
--- a/src/gallium/winsys/sw/android/android_sw_winsys.cpp
+++ b/src/gallium/winsys/sw/android/android_sw_winsys.cpp
@@ -32,8 +32,9 @@
 #include "util/u_format.h"
 #include "state_tracker/sw_winsys.h"
 
-#include <ui/PixelFormat.h>
+#include <utils/Errors.h>
 #include <private/ui/sw_gralloc_handle.h>
+
 #include <hardware/gralloc.h>
 
 #include "android_sw_winsys.h"
@@ -183,39 +184,29 @@ android_is_displaytarget_format_supported(struct sw_winsys *ws,
                                           enum pipe_format format)
 {
    struct android_sw_winsys *droid = android_sw_winsys(ws);
-   int fmt;
+   int fmt = -1;
 
    switch (format) {
    case PIPE_FORMAT_R8G8B8A8_UNORM:
-      fmt = PIXEL_FORMAT_RGBA_8888;
+      fmt = HAL_PIXEL_FORMAT_RGBA_8888;
       break;
    case PIPE_FORMAT_R8G8B8X8_UNORM:
-      fmt = PIXEL_FORMAT_RGBX_8888;
+      fmt = HAL_PIXEL_FORMAT_RGBX_8888;
       break;
    case PIPE_FORMAT_R8G8B8_UNORM:
-      fmt = PIXEL_FORMAT_RGB_888;
+      fmt = HAL_PIXEL_FORMAT_RGB_888;
       break;
    case PIPE_FORMAT_B5G6R5_UNORM:
-      fmt = PIXEL_FORMAT_RGB_565;
+      fmt = HAL_PIXEL_FORMAT_RGB_565;
       break;
    case PIPE_FORMAT_B8G8R8A8_UNORM:
-      fmt = PIXEL_FORMAT_BGRA_8888;
-      break;
-   case PIPE_FORMAT_A8_UNORM:
-      fmt = PIXEL_FORMAT_A_8;
-      break;
-   case PIPE_FORMAT_L8_UNORM:
-      fmt = PIXEL_FORMAT_L_8;
-      break;
-   case PIPE_FORMAT_L8A8_UNORM:
-      fmt = PIXEL_FORMAT_LA_88;
+      fmt = HAL_PIXEL_FORMAT_BGRA_8888;
       break;
    default:
-      fmt = PIXEL_FORMAT_NONE;
       break;
    }
 
-   return (fmt != PIXEL_FORMAT_NONE);
+   return (fmt != -1);
 }
 
 static void

From 8ccafbbbcc797dc15daed883b09250165ab980b3 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Sat, 30 Jul 2011 10:45:20 +0900
Subject: [PATCH 497/600] st/egl: use HAL formats for Android backend

Native buffers use HAL formats, not UI formats.
---
 .../egl/android/native_android.cpp            | 41 +++++++------------
 1 file changed, 15 insertions(+), 26 deletions(-)

diff --git a/src/gallium/state_trackers/egl/android/native_android.cpp b/src/gallium/state_trackers/egl/android/native_android.cpp
index 5c4584b2780..a6e0d254ef2 100644
--- a/src/gallium/state_trackers/egl/android/native_android.cpp
+++ b/src/gallium/state_trackers/egl/android/native_android.cpp
@@ -27,7 +27,7 @@
 #define LOG_TAG "MESA-EGL"
 #include <cutils/log.h>
 #include <cutils/properties.h>
-#include <ui/PixelFormat.h>
+#include <hardware/gralloc.h>
 #include <ui/android_native_buffer.h>
 
 extern "C" {
@@ -97,36 +97,26 @@ get_pipe_format(int native)
 {
    enum pipe_format fmt;
 
-   /* see libpixelflinger/format.cpp */
    switch (native) {
-   case PIXEL_FORMAT_RGBA_8888:
+   case HAL_PIXEL_FORMAT_RGBA_8888:
       fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
       break;
-   case PIXEL_FORMAT_RGBX_8888:
+   case HAL_PIXEL_FORMAT_RGBX_8888:
       fmt = PIPE_FORMAT_R8G8B8X8_UNORM;
       break;
-   case PIXEL_FORMAT_RGB_888:
+   case HAL_PIXEL_FORMAT_RGB_888:
       fmt = PIPE_FORMAT_R8G8B8_UNORM;
       break;
-   case PIXEL_FORMAT_RGB_565:
+   case HAL_PIXEL_FORMAT_RGB_565:
       fmt = PIPE_FORMAT_B5G6R5_UNORM;
       break;
-   case PIXEL_FORMAT_BGRA_8888:
+   case HAL_PIXEL_FORMAT_BGRA_8888:
       fmt = PIPE_FORMAT_B8G8R8A8_UNORM;
       break;
-   case PIXEL_FORMAT_A_8:
-      fmt = PIPE_FORMAT_A8_UNORM;
-      break;
-   case PIXEL_FORMAT_L_8:
-      fmt = PIPE_FORMAT_L8_UNORM;
-      break;
-   case PIXEL_FORMAT_LA_88:
-      fmt = PIPE_FORMAT_L8A8_UNORM;
-      break;
-   case PIXEL_FORMAT_NONE:
-   case PIXEL_FORMAT_RGBA_5551:
-   case PIXEL_FORMAT_RGBA_4444:
-   case PIXEL_FORMAT_RGB_332:
+   case HAL_PIXEL_FORMAT_RGBA_5551:
+      /* fmt = PIPE_FORMAT_A1B5G5R5_UNORM; */
+   case HAL_PIXEL_FORMAT_RGBA_4444:
+      /* fmt = PIPE_FORMAT_A4B4G4R4_UNORM; */
    default:
       LOGE("unsupported native format 0x%x", native);
       fmt = PIPE_FORMAT_NONE;
@@ -443,12 +433,11 @@ android_display_init_configs(struct native_display *ndpy)
 {
    struct android_display *adpy = android_display(ndpy);
    const int native_formats[] = {
-      PIXEL_FORMAT_RGBA_8888,
-      PIXEL_FORMAT_RGBX_8888,
-      PIXEL_FORMAT_RGB_888,
-      PIXEL_FORMAT_RGB_565,
-      PIXEL_FORMAT_BGRA_8888,
-      PIXEL_FORMAT_A_8
+      HAL_PIXEL_FORMAT_RGBA_8888,
+      HAL_PIXEL_FORMAT_RGBX_8888,
+      HAL_PIXEL_FORMAT_RGB_888,
+      HAL_PIXEL_FORMAT_RGB_565,
+      HAL_PIXEL_FORMAT_BGRA_8888,
    };
    int i;
 

From 62c7c2fca423020dc634cad838f53751b9835654 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 20 Jul 2011 18:20:20 +0800
Subject: [PATCH 498/600] st/egl: swapping without a buffer is not an error

This fixes Kwaak3.
---
 src/gallium/state_trackers/egl/android/native_android.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/egl/android/native_android.cpp b/src/gallium/state_trackers/egl/android/native_android.cpp
index a6e0d254ef2..ccb71efa030 100644
--- a/src/gallium/state_trackers/egl/android/native_android.cpp
+++ b/src/gallium/state_trackers/egl/android/native_android.cpp
@@ -296,7 +296,7 @@ android_surface_swap_buffers(struct native_surface *nsurf)
    struct android_display *adpy = asurf->adpy;
 
    if (!asurf->buf)
-      return FALSE;
+      return TRUE;
 
    android_surface_enqueue_buffer(&asurf->base);
 

From 4c222ff4fea46ee5b740575df4b256fa2064984f Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Sun, 31 Jul 2011 10:49:52 +0900
Subject: [PATCH 499/600] st/egl: improve buffer cache for Android

There may be more than two back buffers.  Clean up and prepare the
buffer cache for that.
---
 .../egl/android/native_android.cpp            | 104 ++++++++++++------
 1 file changed, 73 insertions(+), 31 deletions(-)

diff --git a/src/gallium/state_trackers/egl/android/native_android.cpp b/src/gallium/state_trackers/egl/android/native_android.cpp
index ccb71efa030..b9f12dadc41 100644
--- a/src/gallium/state_trackers/egl/android/native_android.cpp
+++ b/src/gallium/state_trackers/egl/android/native_android.cpp
@@ -63,7 +63,12 @@ struct android_surface {
    android_native_buffer_t *buf;
    struct pipe_resource *res;
 
-   /* cache the current front and back resources */
+   /* cache the current back buffers */
+   struct {
+      int width;
+      int height;
+      int format;
+   } cache_key;
    void *cache_handles[2];
    struct pipe_resource *cache_resources[2];
 };
@@ -194,32 +199,42 @@ import_buffer(struct android_display *adpy, const struct pipe_resource *templ,
    return res;
 }
 
-/**
- * Dequeue the next back buffer for rendering.
- */
-static boolean
-android_surface_dequeue_buffer(struct native_surface *nsurf)
+static void
+android_surface_clear_cache(struct native_surface *nsurf)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   int i;
+
+   for (i = 0; i < Elements(asurf->cache_handles); i++) {
+      asurf->cache_handles[i] = NULL;
+      pipe_resource_reference(&asurf->cache_resources[i], NULL);
+   }
+
+   memset(&asurf->cache_key, 0, sizeof(asurf->cache_key));
+}
+
+static struct pipe_resource *
+android_surface_add_cache(struct native_surface *nsurf,
+                          struct android_native_buffer_t *abuf)
 {
    struct android_surface *asurf = android_surface(nsurf);
    void *handle;
    int idx;
 
-   if (asurf->win->dequeueBuffer(asurf->win, &asurf->buf) != NO_ERROR) {
-      LOGE("failed to dequeue window %p", asurf->win);
-      return FALSE;
-   }
-
-   asurf->buf->common.incRef(&asurf->buf->common);
-   asurf->win->lockBuffer(asurf->win, asurf->buf);
+   /* how about abuf->usage? */
+   if (asurf->cache_key.width != abuf->width ||
+       asurf->cache_key.height != abuf->height ||
+       asurf->cache_key.format != abuf->format)
+      android_surface_clear_cache(&asurf->base);
 
    if (asurf->adpy->use_drm)
-      handle = (void *) get_handle_name(asurf->buf->handle);
+      handle = (void *) get_handle_name(abuf->handle);
    else
-      handle = (void *) asurf->buf->handle;
+      handle = (void *) abuf->handle;
    /* NULL is invalid */
    if (!handle) {
-      LOGE("window %p returned an invalid buffer", asurf->win);
-      return TRUE;
+      LOGE("invalid buffer native buffer %p", abuf);
+      return NULL;
    }
 
    /* find the slot to use */
@@ -228,15 +243,18 @@ android_surface_dequeue_buffer(struct native_surface *nsurf)
          break;
    }
    if (idx == Elements(asurf->cache_handles)) {
-      /* buffer reallocated; clear the cache */
-      for (idx = 0; idx < Elements(asurf->cache_handles); idx++) {
-         asurf->cache_handles[idx] = 0;
-         pipe_resource_reference(&asurf->cache_resources[idx], NULL);
-      }
+      LOGW("cache full: buf %p, width %d, height %d, format %d, usage 0x%x",
+            abuf, abuf->width, abuf->height, abuf->format, abuf->usage);
+      android_surface_clear_cache(&asurf->base);
       idx = 0;
    }
 
-   /* update the cache */
+   if (idx == 0) {
+      asurf->cache_key.width = abuf->width;
+      asurf->cache_key.height = abuf->height;
+      asurf->cache_key.format = abuf->format;
+   }
+
    if (!asurf->cache_handles[idx]) {
       struct pipe_resource templ;
 
@@ -244,17 +262,18 @@ android_surface_dequeue_buffer(struct native_surface *nsurf)
 
       memset(&templ, 0, sizeof(templ));
       templ.target = PIPE_TEXTURE_2D;
-      templ.last_level = 0;
-      templ.width0 = asurf->buf->width;
-      templ.height0 = asurf->buf->height;
-      templ.depth0 = 1;
+      templ.format = get_pipe_format(asurf->buf->format);
       templ.bind = PIPE_BIND_RENDER_TARGET;
       if (!asurf->adpy->use_drm) {
          templ.bind |= PIPE_BIND_TRANSFER_WRITE |
                        PIPE_BIND_TRANSFER_READ;
       }
 
-      templ.format = get_pipe_format(asurf->buf->format);
+      templ.width0 = asurf->buf->width;
+      templ.height0 = asurf->buf->height;
+      templ.depth0 = 1;
+      templ.array_size = 1;
+
       if (templ.format != PIPE_FORMAT_NONE) {
          asurf->cache_resources[idx] =
             import_buffer(asurf->adpy, &templ, asurf->buf);
@@ -266,7 +285,31 @@ android_surface_dequeue_buffer(struct native_surface *nsurf)
       asurf->cache_handles[idx] = handle;
    }
 
-   pipe_resource_reference(&asurf->res, asurf->cache_resources[idx]);
+   return asurf->cache_resources[idx];
+}
+
+/**
+ * Dequeue the next back buffer for rendering.
+ */
+static boolean
+android_surface_dequeue_buffer(struct native_surface *nsurf)
+{
+   struct android_surface *asurf = android_surface(nsurf);
+   struct pipe_resource *res;
+
+   if (asurf->win->dequeueBuffer(asurf->win, &asurf->buf) != NO_ERROR) {
+      LOGE("failed to dequeue window %p", asurf->win);
+      return FALSE;
+   }
+
+   asurf->buf->common.incRef(&asurf->buf->common);
+   asurf->win->lockBuffer(asurf->win, asurf->buf);
+
+   res = android_surface_add_cache(&asurf->base, asurf->buf);
+   if (!res)
+      return FALSE;
+
+   pipe_resource_reference(&asurf->res, res);
 
    return TRUE;
 }
@@ -368,8 +411,7 @@ android_surface_destroy(struct native_surface *nsurf)
    if (asurf->buf)
       android_surface_enqueue_buffer(&asurf->base);
 
-   for (i = 0; i < Elements(asurf->cache_handles); i++)
-      pipe_resource_reference(&asurf->cache_resources[i], NULL);
+   android_surface_clear_cache(&asurf->base);
 
    asurf->win->common.decRef(&asurf->win->common);
 

From 5ce2dc692f9ffe801d96f45e5be972d2fe1855f8 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Sun, 31 Jul 2011 11:16:53 +0900
Subject: [PATCH 500/600] st/egl: add buffer preserving support to Android

Use a staging color buffer when buffer preserving is enabled.
---
 .../egl/android/native_android.cpp            | 71 +++++++++++++++++--
 1 file changed, 67 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/egl/android/native_android.cpp b/src/gallium/state_trackers/egl/android/native_android.cpp
index b9f12dadc41..2d7ae8b58bd 100644
--- a/src/gallium/state_trackers/egl/android/native_android.cpp
+++ b/src/gallium/state_trackers/egl/android/native_android.cpp
@@ -37,6 +37,7 @@ extern "C" {
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "util/u_box.h"
 #include "common/native.h"
 #include "common/native_helper.h"
 #include "android/android_sw_winsys.h"
@@ -59,9 +60,12 @@ struct android_surface {
    struct android_display *adpy;
    android_native_window_t *win;
 
+   /* staging color buffer for when buffer preserving is enabled */
+   struct pipe_resource *color_res;
+
    uint stamp;
    android_native_buffer_t *buf;
-   struct pipe_resource *res;
+   struct pipe_resource *buf_res;
 
    /* cache the current back buffers */
    struct {
@@ -309,7 +313,7 @@ android_surface_dequeue_buffer(struct native_surface *nsurf)
    if (!res)
       return FALSE;
 
-   pipe_resource_reference(&asurf->res, res);
+   pipe_resource_reference(&asurf->buf_res, res);
 
    return TRUE;
 }
@@ -322,7 +326,7 @@ android_surface_enqueue_buffer(struct native_surface *nsurf)
 {
    struct android_surface *asurf = android_surface(nsurf);
 
-   pipe_resource_reference(&asurf->res, NULL);
+   pipe_resource_reference(&asurf->buf_res, NULL);
 
    asurf->win->queueBuffer(asurf->win, asurf->buf);
 
@@ -350,17 +354,63 @@ android_surface_swap_buffers(struct native_surface *nsurf)
    return TRUE;
 }
 
+static void
+copy_resources(struct native_display *ndpy,
+               struct pipe_resource *src,
+               struct pipe_resource *dst)
+{
+   struct pipe_context *pipe;
+   struct pipe_box box;
+
+   pipe = ndpy_get_copy_context(ndpy);
+   if (!pipe)
+      return;
+
+   u_box_origin_2d(src->width0, src->height0, &box);
+   pipe->resource_copy_region(pipe, dst, 0, 0, 0, 0, src, 0, &box);
+   pipe->flush(pipe, NULL);
+}
+
 static boolean
 android_surface_present(struct native_surface *nsurf,
                         enum native_attachment natt,
                         boolean preserve,
                         uint swap_interval)
 {
+   struct android_surface *asurf = android_surface(nsurf);
+   struct android_display *adpy = asurf->adpy;
    boolean ret;
 
    if (swap_interval || natt != NATIVE_ATTACHMENT_BACK_LEFT)
       return FALSE;
 
+   /* we always render to color_res first when it exists */
+   if (asurf->color_res) {
+      copy_resources(&adpy->base, asurf->color_res, asurf->buf_res);
+      if (!preserve)
+         pipe_resource_reference(&asurf->color_res, NULL);
+   }
+   else if (preserve) {
+      struct pipe_resource templ;
+
+      memset(&templ, 0, sizeof(templ));
+      templ.target = asurf->buf_res->target;
+      templ.format = asurf->buf_res->format;
+      templ.bind = PIPE_BIND_RENDER_TARGET;
+      templ.width0 = asurf->buf_res->width0;
+      templ.height0 = asurf->buf_res->height0;
+      templ.depth0 = asurf->buf_res->depth0;
+      templ.array_size = asurf->buf_res->array_size;
+
+      asurf->color_res =
+         adpy->base.screen->resource_create(adpy->base.screen, &templ);
+      if (!asurf->color_res)
+         return FALSE;
+
+      /* preserve the contents */
+      copy_resources(&adpy->base, asurf->buf_res, asurf->color_res);
+   }
+
    return android_surface_swap_buffers(nsurf);
 }
 
@@ -375,6 +425,13 @@ android_surface_validate(struct native_surface *nsurf, uint attachment_mask,
    if (!asurf->buf) {
       if (!android_surface_dequeue_buffer(&asurf->base))
          return FALSE;
+
+      /* color_res must be compatible with buf_res */
+      if (asurf->color_res &&
+          (asurf->color_res->format != asurf->buf_res->format ||
+           asurf->color_res->width0 != asurf->buf_res->width0 ||
+           asurf->color_res->height0 != asurf->buf_res->height0))
+         pipe_resource_reference(&asurf->color_res, NULL);
    }
 
    if (textures) {
@@ -383,7 +440,8 @@ android_surface_validate(struct native_surface *nsurf, uint attachment_mask,
 
       if (native_attachment_mask_test(attachment_mask, att)) {
          textures[att] = NULL;
-         pipe_resource_reference(&textures[att], asurf->res);
+         pipe_resource_reference(&textures[att],
+               (asurf->color_res) ? asurf->color_res : asurf->buf_res);
       }
    }
 
@@ -408,6 +466,8 @@ android_surface_destroy(struct native_surface *nsurf)
    struct android_surface *asurf = android_surface(nsurf);
    int i;
 
+   pipe_resource_reference(&asurf->color_res, NULL);
+
    if (asurf->buf)
       android_surface_enqueue_buffer(&asurf->base);
 
@@ -625,6 +685,9 @@ android_display_get_param(struct native_display *ndpy,
    int val;
 
    switch (param) {
+   case NATIVE_PARAM_PRESERVE_BUFFER:
+      val = 1;
+      break;
    default:
       val = 0;
       break;

From b71a7a2f37ec3693b8f3a7f1acb0c59ed076f104 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Thu, 4 Aug 2011 11:49:42 +0900
Subject: [PATCH 501/600] st/egl: improve error logging

This helps diagnose problems in EGL initialization.
---
 .../state_trackers/egl/android/native_android.cpp      |  5 +++--
 src/gallium/targets/egl-static/egl.c                   | 10 +++++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/gallium/state_trackers/egl/android/native_android.cpp b/src/gallium/state_trackers/egl/android/native_android.cpp
index 2d7ae8b58bd..9d2d51fd6d1 100644
--- a/src/gallium/state_trackers/egl/android/native_android.cpp
+++ b/src/gallium/state_trackers/egl/android/native_android.cpp
@@ -24,7 +24,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#define LOG_TAG "MESA-EGL"
+#define LOG_TAG "EGL-GALLIUM"
 #include <cutils/log.h>
 #include <cutils/properties.h>
 #include <hardware/gralloc.h>
@@ -599,7 +599,8 @@ android_display_init_drm(struct native_display *ndpy)
       return TRUE;
    }
    else {
-      LOGE("failed to create DRM screen");
+      LOGW("failed to create DRM screen");
+      LOGW("will fall back to other EGL drivers if any");
       return FALSE;
    }
 }
diff --git a/src/gallium/targets/egl-static/egl.c b/src/gallium/targets/egl-static/egl.c
index 6e5f6f30b11..a7aee27238b 100644
--- a/src/gallium/targets/egl-static/egl.c
+++ b/src/gallium/targets/egl-static/egl.c
@@ -221,13 +221,21 @@ drm_fd_get_screen_name(int fd)
 static struct pipe_screen *
 create_drm_screen(const char *name, int fd)
 {
+   struct pipe_screen *screen;
+
    if (!name) {
       name = drm_fd_get_screen_name(fd);
       if (!name)
          return NULL;
    }
 
-   return egl_pipe_create_drm_screen(name, fd);
+   screen = egl_pipe_create_drm_screen(name, fd);
+   if (screen)
+      _eglLog(_EGL_INFO, "created a pipe screen for %s", name);
+   else
+      _eglLog(_EGL_WARNING, "failed to create a pipe screen for %s", name);
+
+   return screen;
 }
 
 static struct pipe_screen *

From 112e68c5039ff717848304f0c28a07c5b39c7f45 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Tue, 2 Aug 2011 09:49:27 +0900
Subject: [PATCH 502/600] st/egl: add a missing include

Reported by cwhuang.
---
 src/gallium/state_trackers/egl/android/native_android.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/state_trackers/egl/android/native_android.cpp b/src/gallium/state_trackers/egl/android/native_android.cpp
index 9d2d51fd6d1..211d6a2aeef 100644
--- a/src/gallium/state_trackers/egl/android/native_android.cpp
+++ b/src/gallium/state_trackers/egl/android/native_android.cpp
@@ -28,6 +28,7 @@
 #include <cutils/log.h>
 #include <cutils/properties.h>
 #include <hardware/gralloc.h>
+#include <utils/Errors.h>
 #include <ui/android_native_buffer.h>
 
 extern "C" {

From 8c31bc704826d46cad65c4d65b4b70de7144205a Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 17 Aug 2011 10:01:30 -0500
Subject: [PATCH 503/600] glsl_to_tgsi: implement ir_unop_logic_not using 1-x

Since our logic values are 0.0 (false) and 1.0 (true), 1.0 - x accurately
implements logical not.

This is a port of commit 6ad08989d7c1 to glsl_to_tgsi.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 886a1776210..73a647efe34 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1336,7 +1336,17 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
 
    switch (ir->operation) {
    case ir_unop_logic_not:
-      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+      if (result_dst.type != GLSL_TYPE_FLOAT)
+         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+      else {
+         /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
+          * older GPUs implement SEQ using multiple instructions (i915 uses two
+          * SGE instructions and a MUL instruction).  Since our logic values are
+          * 0.0 and 1.0, 1-x also implements !x.
+          */
+         op[0].negate = ~op[0].negate;
+         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
+      }
       break;
    case ir_unop_neg:
       assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);

From 691cc0e3a8716a2cdb7271765cd7d4c7465066eb Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 17 Aug 2011 14:35:35 -0500
Subject: [PATCH 504/600] glsl_to_tgsi: implement ir_binop_logic_or using an
 add w/saturate or add w/SLT

Logical-or is implemented using addition (followed by clamping to [0,1]) on
values of 0.0 and 1.0. Replacing the logical-or operators with addition gives
a + b which has a result on the range [0, 2].

Previously a SNE instruction was used to clamp the resulting logic value to
[0,1]. In a fragment shader, using a saturate on the add has the same effect.
Adding the saturate to the add is free, so (at least) one instruction is
saved. In a vertex shader, using an SLT on the negation of the add result has
the same effect. Many older shader architectures do not support the SNE
instruction. It must be emulated using two SLT instructions and an ADD. On
these architectures, the single SLT saves two instructions.

Note that SNE is still used when integers are used for boolean values, since
there is no such thing as an integer saturate, and older shader architectures
without SNE don't support integers.

This is a port of commit 41f8ffe5e07c to glsl_to_tgsi with integer support
added.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 31 +++++++++++++++++++---
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 73a647efe34..5f4aef16b66 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1493,11 +1493,34 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       break;
 
-   case ir_binop_logic_or:
-      /* This could be a saturated add and skip the SNE. */
-      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
-      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+   case ir_binop_logic_or: {
+      /* After the addition, the value will be an integer on the
+       * range [0,2].  Zero stays zero, and positive values become 1.0.
+       */
+      glsl_to_tgsi_instruction *add =
+         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+          result_dst.type == GLSL_TYPE_FLOAT) {
+         /* The clamping to [0,1] can be done for free in the fragment
+          * shader with a saturate if floats are being used as boolean values.
+          */
+         add->saturate = true;
+      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+         /* Negating the result of the addition gives values on the range
+          * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
+          * is achieved using SLT.
+          */
+         st_src_reg slt_src = result_src;
+         slt_src.negate = ~slt_src.negate;
+         emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+      } else {
+         /* Use an SNE on the result of the addition.  Zero stays zero,
+          * 1 stays 1, and 2 becomes 1.
+          */
+         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+      }
       break;
+   }
 
    case ir_binop_logic_and:
       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */

From c15eb5569bf76c5dc41327017b92a5d960207b97 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Wed, 17 Aug 2011 20:34:19 -0500
Subject: [PATCH 505/600] glsl_to_tgsi: make glsl_to_tgsi_visitor::emit_dp
 return the instruction

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5f4aef16b66..62127afadde 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -373,11 +373,11 @@ public:
    /**
     * Emit the correct dot-product instruction for the type of arguments
     */
-   void emit_dp(ir_instruction *ir,
-                st_dst_reg dst,
-                st_src_reg src0,
-                st_src_reg src1,
-                unsigned elements);
+   glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
+                                     st_dst_reg dst,
+                                     st_src_reg src0,
+                                     st_src_reg src1,
+                                     unsigned elements);
 
    void emit_scalar(ir_instruction *ir, unsigned op,
         	    st_dst_reg dst, st_src_reg src0);
@@ -642,7 +642,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
    return op;
 }
 
-void
+glsl_to_tgsi_instruction *
 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
         		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
         		    unsigned elements)
@@ -651,7 +651,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
       TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
    };
 
-   emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 }
 
 /**

From a43f68810a347f3e952a0bc401be6edb91e1baea Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 20 Aug 2011 13:26:12 -0500
Subject: [PATCH 506/600] glsl_to_tgsi: implement ir_unop_any using DP4
 w/saturate or DP4 w/SLT

This is a port of commit 92ca560d68e8 to glsl_to_tgsi, with integer support
added.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 32 +++++++++++++++++++---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 62127afadde..f7d79e9f50c 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1482,12 +1482,36 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       }
       break;
 
-   case ir_unop_any:
+   case ir_unop_any: {
       assert(ir->operands[0]->type->is_vector());
-      emit_dp(ir, result_dst, op[0], op[0],
-              ir->operands[0]->type->vector_elements);
-      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+
+      /* After the dot-product, the value will be an integer on the
+       * range [0,4].  Zero stays zero, and positive values become 1.0.
+       */
+      glsl_to_tgsi_instruction *const dp =
+         emit_dp(ir, result_dst, op[0], op[0],
+                 ir->operands[0]->type->vector_elements);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+          result_dst.type == GLSL_TYPE_FLOAT) {
+	      /* The clamping to [0,1] can be done for free in the fragment
+	       * shader with a saturate.
+	       */
+	      dp->saturate = true;
+      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+	      /* Negating the result of the dot-product gives values on the range
+	       * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+	       * is achieved using SLT.
+	       */
+	      st_src_reg slt_src = result_src;
+	      slt_src.negate = ~slt_src.negate;
+	      emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+      }
+      else {
+         /* Use SNE 0 if integers are being used as boolean values. */
+         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+      }
       break;
+   }
 
    case ir_binop_logic_xor:
       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);

From f3dce133f0422c42ca61f07f488237107efc30e6 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 20 Aug 2011 13:56:06 -0500
Subject: [PATCH 507/600] glsl_to_tgsi: implement ir_binop_any_nequal using DP4
 w/saturate or DP4 w/SLT

Implement the any() part of the operation the same way regular ir_unop_any
is implemented.

This is a port of commit e7bf096e8b04 to glsl_to_tgsi, with added integer
support.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 25 ++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f7d79e9f50c..b238c267c81 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1475,8 +1475,29 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
                glsl_type::vec4_type);
          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
-         emit_dp(ir, result_dst, temp, temp, vector_elements);
-         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+
+         /* After the dot-product, the value will be an integer on the
+          * range [0,4].  Zero stays zero, and positive values become 1.0.
+          */
+         glsl_to_tgsi_instruction *const dp =
+               emit_dp(ir, result_dst, temp, temp, vector_elements);
+         if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+             result_dst.type == GLSL_TYPE_FLOAT) {
+            /* The clamping to [0,1] can be done for free in the fragment
+             * shader with a saturate.
+             */
+            dp->saturate = true;
+         } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+            /* Negating the result of the dot-product gives values on the range
+             * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+             * achieved using SLT.
+             */
+            st_src_reg slt_src = result_src;
+            slt_src.negate = ~slt_src.negate;
+            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+         } else {
+            emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+         }
       } else {
          emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       }

From 9098953ee6e0699e13e35183c817ecf40363d538 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 20 Aug 2011 14:15:03 -0500
Subject: [PATCH 508/600] glsl_to_tgsi: implement ir_binop_all_equal using DP4
 w/SGE

This is a port of commit ba01df11c4d0 to glsl_to_tgsi with integer support
added.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index b238c267c81..b211fc680a3 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1460,8 +1460,26 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
                glsl_type::vec4_type);
          assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         
+         /* After the dot-product, the value will be an integer on the
+          * range [0,4].  Zero becomes 1.0, and positive values become zero.
+          */
          emit_dp(ir, result_dst, temp, temp, vector_elements);
-         emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
+         
+         if (result_dst.type == GLSL_TYPE_FLOAT) {
+            /* Negating the result of the dot-product gives values on the range
+             * [-4, 0].  Zero becomes 1.0, and negative values become zero.
+             * This is achieved using SGE.
+             */
+            st_src_reg sge_src = result_src;
+            sge_src.negate = ~sge_src.negate;
+            emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
+         } else {
+            /* The TGSI negate flag doesn't work for integers, so use SEQ 0
+             * instead.
+             */
+            emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_int(0));
+         }
       } else {
          emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
       }

From c721d7b7bc70503d2ebb6c742be96371b68bd152 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 20 Aug 2011 14:17:52 -0500
Subject: [PATCH 509/600] glsl_to_tgsi: fix typo

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index b211fc680a3..2885630c7db 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1514,7 +1514,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
             slt_src.negate = ~slt_src.negate;
             emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
          } else {
-            emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+            emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
          }
       } else {
          emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);

From 5379a70d3fabd9cf92a615647f81289d33ae9468 Mon Sep 17 00:00:00 2001
From: Bryan Cain <bryancain3@gmail.com>
Date: Sat, 20 Aug 2011 14:43:25 -0500
Subject: [PATCH 510/600] glsl_to_tgsi: emit a MAD(b, -a, b) for !a && b

This is a port of commit ff2cfb8989cd to glsl_to_tgsi.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 52 ++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 2885630c7db..a1f56d3d78a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -392,6 +392,8 @@ public:
 
    GLboolean try_emit_mad(ir_expression *ir,
         		  int mul_operand);
+   bool try_emit_mad_for_and_not(ir_expression *ir,
+              int mul_operand);
    GLboolean try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
@@ -1210,6 +1212,46 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    return true;
 }
 
+/**
+ * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ *     - a * !b
+ *     - a * (1 - b)
+ *     - (a * 1) - (a * b)
+ *     - a + -(a * b)
+ *     - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+   const int other_operand = 1 - try_operand;
+   st_src_reg a, b;
+
+   ir_expression *expr = ir->operands[try_operand]->as_expression();
+   if (!expr || expr->operation != ir_unop_logic_not)
+      return false;
+
+   ir->operands[other_operand]->accept(this);
+   a = this->result;
+   expr->operands[0]->accept(this);
+   b = this->result;
+
+   b.negate = ~b.negate;
+
+   this->result = get_temp(ir->type);
+   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
+
+   return true;
+}
+
 GLboolean
 glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
 {
@@ -1291,6 +1333,16 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       if (try_emit_mad(ir, 0))
          return;
    }
+
+   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+    */
+   if (ir->operation == ir_binop_logic_and) {
+      if (try_emit_mad_for_and_not(ir, 1))
+	 return;
+      if (try_emit_mad_for_and_not(ir, 0))
+	 return;
+   }
+
    if (try_emit_sat(ir))
       return;
 

From 7a5d28908c03c5ce38da3f041d23bfd103a5becd Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sun, 21 Aug 2011 23:06:39 -0700
Subject: [PATCH 511/600] glsl_to_tgsi: Fix a few more struct vs. class
 warnings.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index a1f56d3d78a..4b3e00c4242 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -866,7 +866,7 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file,
    }
 }
 
-struct st_src_reg
+st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 {
    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
@@ -878,7 +878,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
    return src;
 }
 
-struct st_src_reg
+st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
 {
    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
@@ -892,7 +892,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
    return src;
 }
 
-struct st_src_reg
+st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
 {
    if (native_integers)
@@ -1035,7 +1035,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
          }
       }
 
-      struct variable_storage *storage;
+      variable_storage *storage;
       st_dst_reg dst;
       if (i == ir->num_state_slots) {
          /* We'll set the index later. */
@@ -4236,7 +4236,7 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg)
 
 static void
 compile_tgsi_instruction(struct st_translate *t,
-                         const struct glsl_to_tgsi_instruction *inst)
+                         const glsl_to_tgsi_instruction *inst)
 {
    struct ureg_program *ureg = t->ureg;
    GLuint i;

From 69595283b64d1f01b33022c38468376ad8596ea7 Mon Sep 17 00:00:00 2001
From: Chad Versace <chad@chad-versace.us>
Date: Wed, 17 Aug 2011 17:35:07 -0700
Subject: [PATCH 512/600] intel: Abort when DRI2 separate stencil handshake
 fails

When intel_context requires separate stencil but the DRI2 separate stencil
handshake fails, then abort and emit an error instructing the user to
upgrade the DDX to 2.16.0.

CC: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Chad Versace <chad@chad-versace.us>
---
 src/mesa/drivers/dri/intel/intel_context.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index fe8be082dfc..14342ef6246 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -1454,6 +1454,13 @@ intel_verify_dri2_has_hiz(struct intel_context *intel,
 	  * a combined depth/stencil buffer. Discard the hiz buffer too.
 	  */
 	 intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_FALSE;
+	 if (intel->must_use_separate_stencil) {
+	    _mesa_problem(&intel->ctx,
+			  "intel_context requires separate stencil, but the "
+			  "DRIscreen does not support it. You may need to "
+			  "upgrade the Intel X driver to 2.16.0");
+	    abort();
+	 }
 
 	 /* 1. Discard depth and stencil renderbuffers. */
 	 _mesa_remove_renderbuffer(fb, BUFFER_DEPTH);

From 6c8ea1eed66e9da82fb2b49b1e7f6d7f6064dbc4 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 18 Aug 2011 13:52:28 -0700
Subject: [PATCH 513/600] glsl: Make ir_validate actually visit ir_if nodes.

There is no ir_hierarchical_visitor::visit(ir_if *) method, since ir_if
is not a leaf node.  Instead, there are visit_enter and visit_leave
methods.  Use visit_enter arbitrarily (either would work fine, though
visit_enter will catch errors sooner).

Found thanks to a warning emitted by Clang.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/ir_validate.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index b3ca72ef0c2..2d1c6097c57 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -59,7 +59,8 @@ public:
 
    virtual ir_visitor_status visit(ir_variable *v);
    virtual ir_visitor_status visit(ir_dereference_variable *ir);
-   virtual ir_visitor_status visit(ir_if *ir);
+
+   virtual ir_visitor_status visit_enter(ir_if *ir);
 
    virtual ir_visitor_status visit_leave(ir_loop *ir);
    virtual ir_visitor_status visit_enter(ir_function *ir);
@@ -102,7 +103,7 @@ ir_validate::visit(ir_dereference_variable *ir)
 }
 
 ir_visitor_status
-ir_validate::visit(ir_if *ir)
+ir_validate::visit_enter(ir_if *ir)
 {
    if (ir->condition->type != glsl_type::bool_type) {
       printf("ir_if condition %s type instead of bool.\n",

From 582b5d869c0f05814d4d567636a743d3fdddf431 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Thu, 18 Aug 2011 17:12:29 +0800
Subject: [PATCH 514/600] scons: add ParseSourceList method
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ParseSourceList() can be used to parse a source list file and returns
the source files defined in it.  It is supposed to be used like this

  # get the list of source files from C_SOURCES in Makefile.sources
  sources = env.ParseSourceList('Makefile.sources', 'C_SOURCES')

The syntax of a source list file is compatible with GNU Make.  This
effectively allows SConscript and Makefile to share the source lists.

Acked-by: José Fonseca <jfonseca@vmware.com>
Acked-by: Chad Versace <chad@chad-versace.us>
---
 scons/custom.py      |  36 +++++++++++++
 scons/source_list.py | 123 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 159 insertions(+)
 create mode 100644 scons/source_list.py

diff --git a/scons/custom.py b/scons/custom.py
index df7ac93bb00..b6d716cf437 100644
--- a/scons/custom.py
+++ b/scons/custom.py
@@ -42,6 +42,7 @@ import SCons.Scanner
 
 import fixes
 
+import source_list
 
 def quietCommandLines(env):
     # Quiet command lines
@@ -229,6 +230,40 @@ def createPkgConfigMethods(env):
     env.AddMethod(pkg_use_modules, 'PkgUseModules')
 
 
+def parse_source_list(env, filename, names=None):
+    # parse the source list file
+    parser = source_list.SourceListParser()
+    src = env.File(filename).srcnode()
+    sym_table = parser.parse(src.abspath)
+
+    if names:
+        if isinstance(names, basestring):
+            names = [names]
+
+        symbols = names
+    else:
+        symbols = sym_table.keys()
+
+    # convert the symbol table to source lists
+    src_lists = {}
+    for sym in symbols:
+        val = sym_table[sym]
+        src_lists[sym] = [f for f in val.split(' ') if f]
+
+    # if names are given, concatenate the lists
+    if names:
+        srcs = []
+        for name in names:
+            srcs.extend(src_lists[name])
+
+        return srcs
+    else:
+        return src_lists
+
+def createParseSourceListMethod(env):
+    env.AddMethod(parse_source_list, 'ParseSourceList')
+
+
 def generate(env):
     """Common environment generation code"""
 
@@ -240,6 +275,7 @@ def generate(env):
     createConvenienceLibBuilder(env)
     createCodeGenerateMethod(env)
     createPkgConfigMethods(env)
+    createParseSourceListMethod(env)
 
     # for debugging
     #print env.Dump()
diff --git a/scons/source_list.py b/scons/source_list.py
new file mode 100644
index 00000000000..fbd3ef7dc17
--- /dev/null
+++ b/scons/source_list.py
@@ -0,0 +1,123 @@
+"""Source List Parser
+
+The syntax of a source list file is a very small subset of GNU Make.  These
+features are supported
+
+ operators: +=, :=
+ line continuation
+ non-nested variable expansion
+ comment
+
+The goal is to allow Makefile's and SConscript's to share source listing.
+"""
+
+class SourceListParser(object):
+    def __init__(self):
+        self._reset()
+
+    def _reset(self, filename=None):
+        self.filename = filename
+
+        self.line_no = 1
+        self.line_cont = ''
+        self.symbol_table = {}
+
+    def _error(self, msg):
+        raise RuntimeError('%s:%d: %s' % (self.filename, self.line_no, msg))
+
+    def _next_dereference(self, val, cur):
+        """Locate the next $(...) in value."""
+        deref_pos = val.find('$', cur)
+        if deref_pos < 0:
+            return (-1, -1)
+        elif val[deref_pos + 1] != '(':
+            self._error('non-variable dereference')
+
+        deref_end = val.find(')', deref_pos + 2)
+        if deref_end < 0:
+            self._error('unterminated variable dereference')
+
+        return (deref_pos, deref_end + 1)
+
+    def _expand_value(self, val):
+        """Perform variable expansion."""
+        expanded = ''
+        cur = 0
+        while True:
+            deref_pos, deref_end = self._next_dereference(val, cur)
+            if deref_pos < 0:
+                expanded += val[cur:]
+                break
+
+            sym = val[(deref_pos + 2):(deref_end - 1)]
+            expanded += val[cur:deref_pos] + self.symbol_table[sym]
+            cur = deref_end
+
+        return expanded
+
+    def _parse_definition(self, line):
+        """Parse a variable definition line."""
+        op_pos = line.find('=')
+        op_end = op_pos + 1
+        if op_pos < 0:
+            self._error('not a variable definition')
+
+        if op_pos > 0 and line[op_pos - 1] in [':', '+']:
+            op_pos -= 1
+        else:
+            self._error('only := and += are supported')
+
+        # set op, sym, and val
+        op = line[op_pos:op_end]
+        sym = line[:op_pos].strip()
+        val = self._expand_value(line[op_end:].lstrip())
+
+        if op == ':=':
+            self.symbol_table[sym] = val
+        elif op == '+=':
+            self.symbol_table[sym] += ' ' + val
+
+    def _parse_line(self, line):
+        """Parse a source list line."""
+        # more lines to come
+        if line and line[-1] == '\\':
+            # spaces around "\\\n" are replaced by a single space
+            if self.line_cont:
+                self.line_cont += line[:-1].strip() + ' '
+            else:
+                self.line_cont = line[:-1].rstrip() + ' '
+            return 0
+
+        # combine with previous lines
+        if self.line_cont:
+            line = self.line_cont + line.lstrip()
+            self.line_cont = ''
+
+        if line:
+            begins_with_tab = (line[0] == '\t')
+
+            line = line.lstrip()
+            if line[0] != '#':
+                if begins_with_tab:
+                    self._error('recipe line not supported')
+                else:
+                    self._parse_definition(line)
+
+        return 1
+
+    def parse(self, filename):
+        """Parse a source list file."""
+        if self.filename != filename:
+            fp = open(filename)
+            lines = fp.read().splitlines()
+            fp.close()
+
+            try:
+                self._reset(filename)
+                for line in lines:
+                    self.line_no += self._parse_line(line)
+            except:
+                self._reset()
+                raise
+
+        return self.symbol_table

From dd250e6045dd290098b59af01b5e1b501535d45a Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 19 Aug 2011 11:05:37 +0800
Subject: [PATCH 515/600] auxiliary: share the source lists
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Factor out source lists from Makefile to Makefile.sources, and let
Makefile, SConscript, and Android.mk share it.

Note that files in $(GENERATED_SOURCES) are removed from $(C_SOURCES).

Acked-by: José Fonseca <jfonseca@vmware.com>
Acked-by: Chad Versace <chad@chad-versace.us>
---
 src/gallium/auxiliary/Android.mk       | 203 +------------------------
 src/gallium/auxiliary/Makefile         | 201 +-----------------------
 src/gallium/auxiliary/Makefile.sources | 193 +++++++++++++++++++++++
 src/gallium/auxiliary/SConscript       | 201 +-----------------------
 4 files changed, 207 insertions(+), 591 deletions(-)
 create mode 100644 src/gallium/auxiliary/Makefile.sources

diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk
index e8390c1f93e..0c37dd31ab6 100644
--- a/src/gallium/auxiliary/Android.mk
+++ b/src/gallium/auxiliary/Android.mk
@@ -23,209 +23,12 @@
 
 LOCAL_PATH := $(call my-dir)
 
-# from Makefile
-C_SOURCES = \
-	cso_cache/cso_cache.c \
-	cso_cache/cso_context.c \
-	cso_cache/cso_hash.c \
-	draw/draw_context.c \
-	draw/draw_fs.c \
-	draw/draw_gs.c \
-	draw/draw_pipe.c \
-	draw/draw_pipe_aaline.c \
-	draw/draw_pipe_aapoint.c \
-	draw/draw_pipe_clip.c \
-	draw/draw_pipe_cull.c \
-	draw/draw_pipe_flatshade.c \
-	draw/draw_pipe_offset.c \
-	draw/draw_pipe_pstipple.c \
-	draw/draw_pipe_stipple.c \
-	draw/draw_pipe_twoside.c \
-	draw/draw_pipe_unfilled.c \
-	draw/draw_pipe_util.c \
-	draw/draw_pipe_validate.c \
-	draw/draw_pipe_vbuf.c \
-	draw/draw_pipe_wide_line.c \
-	draw/draw_pipe_wide_point.c \
-	draw/draw_pt.c \
-	draw/draw_pt_emit.c \
-	draw/draw_pt_fetch.c \
-	draw/draw_pt_fetch_emit.c \
-	draw/draw_pt_fetch_shade_emit.c \
-	draw/draw_pt_fetch_shade_pipeline.c \
-	draw/draw_pt_post_vs.c \
-	draw/draw_pt_so_emit.c \
-	draw/draw_pt_util.c \
-	draw/draw_pt_vsplit.c \
-	draw/draw_vertex.c \
-	draw/draw_vs.c \
-	draw/draw_vs_aos.c \
-	draw/draw_vs_aos_io.c \
-	draw/draw_vs_aos_machine.c \
-	draw/draw_vs_exec.c \
-	draw/draw_vs_ppc.c \
-	draw/draw_vs_sse.c \
-	draw/draw_vs_variant.c \
-	indices/u_indices_gen.c \
-	indices/u_unfilled_gen.c \
-	os/os_misc.c \
-	os/os_stream.c \
-	os/os_stream_log.c \
-	os/os_stream_null.c \
-	os/os_stream_stdc.c \
-	os/os_stream_str.c \
-	os/os_time.c \
-	pipebuffer/pb_buffer_fenced.c \
-	pipebuffer/pb_buffer_malloc.c \
-	pipebuffer/pb_bufmgr_alt.c \
-	pipebuffer/pb_bufmgr_cache.c \
-	pipebuffer/pb_bufmgr_debug.c \
-	pipebuffer/pb_bufmgr_mm.c \
-	pipebuffer/pb_bufmgr_ondemand.c \
-	pipebuffer/pb_bufmgr_pool.c \
-	pipebuffer/pb_bufmgr_slab.c \
-	pipebuffer/pb_validate.c \
-	rbug/rbug_connection.c \
-	rbug/rbug_context.c \
-	rbug/rbug_core.c \
-	rbug/rbug_demarshal.c \
-	rbug/rbug_texture.c \
-	rbug/rbug_shader.c \
-	rtasm/rtasm_cpu.c \
-	rtasm/rtasm_execmem.c \
-	rtasm/rtasm_ppc.c \
-	rtasm/rtasm_ppc_spe.c \
-	rtasm/rtasm_x86sse.c \
-	tgsi/tgsi_build.c \
-	tgsi/tgsi_dump.c \
-	tgsi/tgsi_exec.c \
-	tgsi/tgsi_info.c \
-	tgsi/tgsi_iterate.c \
-	tgsi/tgsi_parse.c \
-	tgsi/tgsi_ppc.c \
-	tgsi/tgsi_sanity.c \
-	tgsi/tgsi_scan.c \
-	tgsi/tgsi_sse2.c \
-	tgsi/tgsi_text.c \
-	tgsi/tgsi_transform.c \
-	tgsi/tgsi_ureg.c \
-	tgsi/tgsi_util.c \
-	translate/translate.c \
-	translate/translate_cache.c \
-	translate/translate_generic.c \
-	translate/translate_sse.c \
-	util/u_debug.c \
-	util/u_debug_describe.c \
-	util/u_debug_refcnt.c \
-	util/u_debug_stack.c \
-	util/u_debug_symbol.c \
-	util/u_dump_defines.c \
-	util/u_dump_state.c \
-	util/u_bitmask.c \
-	util/u_blit.c \
-	util/u_blitter.c \
-	util/u_cache.c \
-	util/u_caps.c \
-	util/u_cpu_detect.c \
-	util/u_dl.c \
-	util/u_draw.c \
-	util/u_draw_quad.c \
-	util/u_format.c \
-	util/u_format_other.c \
-	util/u_format_latc.c \
-	util/u_format_s3tc.c \
-	util/u_format_rgtc.c \
-	util/u_format_srgb.c \
-	util/u_format_table.c \
-	util/u_format_tests.c \
-	util/u_format_yuv.c \
-	util/u_format_zs.c \
-	util/u_framebuffer.c \
-	util/u_gen_mipmap.c \
-	util/u_half.c \
-	util/u_handle_table.c \
-	util/u_hash.c \
-	util/u_hash_table.c \
-	util/u_index_modify.c \
-	util/u_keymap.c \
-	util/u_linear.c \
-	util/u_linkage.c \
-	util/u_network.c \
-	util/u_math.c \
-	util/u_mm.c \
-	util/u_pstipple.c \
-	util/u_rect.c \
-	util/u_ringbuffer.c \
-	util/u_sampler.c \
-	util/u_simple_shaders.c \
-	util/u_slab.c \
-	util/u_snprintf.c \
-	util/u_staging.c \
-	util/u_surface.c \
-	util/u_surfaces.c \
-	util/u_texture.c \
-	util/u_tile.c \
-	util/u_transfer.c \
-	util/u_resource.c \
-	util/u_upload_mgr.c \
-	util/u_vbuf_mgr.c \
-	vl/vl_csc.c \
-	vl/vl_compositor.c \
-	vl/vl_decoder.c \
-	vl/vl_mpeg12_decoder.c \
-	vl/vl_mpeg12_bitstream.c \
-	vl/vl_zscan.c \
-        vl/vl_idct.c \
-	vl/vl_mc.c \
-        vl/vl_vertex_buffers.c \
-        vl/vl_video_buffer.c
-
-GALLIVM_SOURCES = \
-        gallivm/lp_bld_arit.c \
-        gallivm/lp_bld_assert.c \
-        gallivm/lp_bld_bitarit.c \
-        gallivm/lp_bld_const.c \
-        gallivm/lp_bld_conv.c \
-        gallivm/lp_bld_flow.c \
-        gallivm/lp_bld_format_aos.c \
-        gallivm/lp_bld_format_soa.c \
-        gallivm/lp_bld_format_yuv.c \
-        gallivm/lp_bld_gather.c \
-        gallivm/lp_bld_init.c \
-        gallivm/lp_bld_intr.c \
-        gallivm/lp_bld_logic.c \
-        gallivm/lp_bld_pack.c \
-        gallivm/lp_bld_printf.c \
-        gallivm/lp_bld_quad.c \
-        gallivm/lp_bld_sample.c \
-        gallivm/lp_bld_sample_aos.c \
-        gallivm/lp_bld_sample_soa.c \
-        gallivm/lp_bld_struct.c \
-        gallivm/lp_bld_swizzle.c \
-        gallivm/lp_bld_tgsi_aos.c \
-        gallivm/lp_bld_tgsi_info.c \
-        gallivm/lp_bld_tgsi_soa.c \
-        gallivm/lp_bld_type.c \
-        draw/draw_llvm.c \
-        draw/draw_llvm_sample.c \
-        draw/draw_llvm_translate.c \
-        draw/draw_vs_llvm.c \
-        draw/draw_pt_fetch_shade_pipeline_llvm.c
-
-GALLIVM_CPP_SOURCES = \
-	gallivm/lp_bld_debug.cpp \
-	gallivm/lp_bld_misc.cpp
-
-GENERATED_SOURCES = \
-	indices/u_indices_gen.c \
-	indices/u_unfilled_gen.c \
-	util/u_format_srgb.c \
-	util/u_format_table.c \
-	util/u_half.c
+# get C_SOURCES and GENERATED_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
 
 include $(CLEAR_VARS)
 
-LOCAL_SRC_FILES := $(filter-out $(GENERATED_SOURCES), $(C_SOURCES))
+LOCAL_SRC_FILES := $(C_SOURCES)
 
 LOCAL_C_INCLUDES := $(GALLIUM_TOP)/auxiliary/util
 
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 7dae7bc908b..896c058fde9 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -3,205 +3,10 @@ include $(TOP)/configs/current
 
 LIBNAME = gallium
 
-C_SOURCES = \
-	cso_cache/cso_cache.c \
-	cso_cache/cso_context.c \
-	cso_cache/cso_hash.c \
-	draw/draw_context.c \
-	draw/draw_fs.c \
-	draw/draw_gs.c \
-	draw/draw_pipe.c \
-	draw/draw_pipe_aaline.c \
-	draw/draw_pipe_aapoint.c \
-	draw/draw_pipe_clip.c \
-	draw/draw_pipe_cull.c \
-	draw/draw_pipe_flatshade.c \
-	draw/draw_pipe_offset.c \
-	draw/draw_pipe_pstipple.c \
-	draw/draw_pipe_stipple.c \
-	draw/draw_pipe_twoside.c \
-	draw/draw_pipe_unfilled.c \
-	draw/draw_pipe_util.c \
-	draw/draw_pipe_validate.c \
-	draw/draw_pipe_vbuf.c \
-	draw/draw_pipe_wide_line.c \
-	draw/draw_pipe_wide_point.c \
-	draw/draw_pt.c \
-	draw/draw_pt_emit.c \
-	draw/draw_pt_fetch.c \
-	draw/draw_pt_fetch_emit.c \
-	draw/draw_pt_fetch_shade_emit.c \
-	draw/draw_pt_fetch_shade_pipeline.c \
-	draw/draw_pt_post_vs.c \
-	draw/draw_pt_so_emit.c \
-	draw/draw_pt_util.c \
-	draw/draw_pt_vsplit.c \
-	draw/draw_vertex.c \
-	draw/draw_vs.c \
-	draw/draw_vs_aos.c \
-	draw/draw_vs_aos_io.c \
-	draw/draw_vs_aos_machine.c \
-	draw/draw_vs_exec.c \
-	draw/draw_vs_ppc.c \
-	draw/draw_vs_sse.c \
-	draw/draw_vs_variant.c \
-	indices/u_indices_gen.c \
-	indices/u_unfilled_gen.c \
-	os/os_misc.c \
-	os/os_stream.c \
-	os/os_stream_log.c \
-	os/os_stream_null.c \
-	os/os_stream_stdc.c \
-	os/os_stream_str.c \
-	os/os_time.c \
-	pipebuffer/pb_buffer_fenced.c \
-	pipebuffer/pb_buffer_malloc.c \
-	pipebuffer/pb_bufmgr_alt.c \
-	pipebuffer/pb_bufmgr_cache.c \
-	pipebuffer/pb_bufmgr_debug.c \
-	pipebuffer/pb_bufmgr_mm.c \
-	pipebuffer/pb_bufmgr_ondemand.c \
-	pipebuffer/pb_bufmgr_pool.c \
-	pipebuffer/pb_bufmgr_slab.c \
-	pipebuffer/pb_validate.c \
-	rbug/rbug_connection.c \
-	rbug/rbug_context.c \
-	rbug/rbug_core.c \
-	rbug/rbug_demarshal.c \
-	rbug/rbug_texture.c \
-	rbug/rbug_shader.c \
-	rtasm/rtasm_cpu.c \
-	rtasm/rtasm_execmem.c \
-	rtasm/rtasm_ppc.c \
-	rtasm/rtasm_ppc_spe.c \
-	rtasm/rtasm_x86sse.c \
-	tgsi/tgsi_build.c \
-	tgsi/tgsi_dump.c \
-	tgsi/tgsi_exec.c \
-	tgsi/tgsi_info.c \
-	tgsi/tgsi_iterate.c \
-	tgsi/tgsi_parse.c \
-	tgsi/tgsi_ppc.c \
-	tgsi/tgsi_sanity.c \
-	tgsi/tgsi_scan.c \
-	tgsi/tgsi_sse2.c \
-	tgsi/tgsi_text.c \
-	tgsi/tgsi_transform.c \
-	tgsi/tgsi_ureg.c \
-	tgsi/tgsi_util.c \
-	translate/translate.c \
-	translate/translate_cache.c \
-	translate/translate_generic.c \
-	translate/translate_sse.c \
-	util/u_debug.c \
-	util/u_debug_describe.c \
-	util/u_debug_refcnt.c \
-	util/u_debug_stack.c \
-	util/u_debug_symbol.c \
-	util/u_dump_defines.c \
-	util/u_dump_state.c \
-	util/u_bitmask.c \
-	util/u_blit.c \
-	util/u_blitter.c \
-	util/u_cache.c \
-	util/u_caps.c \
-	util/u_cpu_detect.c \
-	util/u_dl.c \
-	util/u_draw.c \
-	util/u_draw_quad.c \
-	util/u_format.c \
-	util/u_format_other.c \
-	util/u_format_latc.c \
-	util/u_format_s3tc.c \
-	util/u_format_rgtc.c \
-	util/u_format_srgb.c \
-	util/u_format_table.c \
-	util/u_format_tests.c \
-	util/u_format_yuv.c \
-	util/u_format_zs.c \
-	util/u_framebuffer.c \
-	util/u_gen_mipmap.c \
-	util/u_half.c \
-	util/u_handle_table.c \
-	util/u_hash.c \
-	util/u_hash_table.c \
-	util/u_index_modify.c \
-	util/u_keymap.c \
-	util/u_linear.c \
-	util/u_linkage.c \
-	util/u_network.c \
-	util/u_math.c \
-	util/u_mm.c \
-	util/u_pstipple.c \
-	util/u_rect.c \
-	util/u_ringbuffer.c \
-	util/u_sampler.c \
-	util/u_simple_shaders.c \
-	util/u_slab.c \
-	util/u_snprintf.c \
-	util/u_staging.c \
-	util/u_surface.c \
-	util/u_surfaces.c \
-	util/u_texture.c \
-	util/u_tile.c \
-	util/u_transfer.c \
-	util/u_resource.c \
-	util/u_upload_mgr.c \
-	util/u_vbuf_mgr.c \
-	vl/vl_csc.c \
-	vl/vl_compositor.c \
-	vl/vl_decoder.c \
-	vl/vl_mpeg12_decoder.c \
-	vl/vl_mpeg12_bitstream.c \
-	vl/vl_zscan.c \
-        vl/vl_idct.c \
-	vl/vl_mc.c \
-        vl/vl_vertex_buffers.c \
-        vl/vl_video_buffer.c
-
-GALLIVM_SOURCES = \
-        gallivm/lp_bld_arit.c \
-        gallivm/lp_bld_assert.c \
-        gallivm/lp_bld_bitarit.c \
-        gallivm/lp_bld_const.c \
-        gallivm/lp_bld_conv.c \
-        gallivm/lp_bld_flow.c \
-        gallivm/lp_bld_format_aos.c \
-        gallivm/lp_bld_format_soa.c \
-        gallivm/lp_bld_format_yuv.c \
-        gallivm/lp_bld_gather.c \
-        gallivm/lp_bld_init.c \
-        gallivm/lp_bld_intr.c \
-        gallivm/lp_bld_logic.c \
-        gallivm/lp_bld_pack.c \
-        gallivm/lp_bld_printf.c \
-        gallivm/lp_bld_quad.c \
-        gallivm/lp_bld_sample.c \
-        gallivm/lp_bld_sample_aos.c \
-        gallivm/lp_bld_sample_soa.c \
-        gallivm/lp_bld_struct.c \
-        gallivm/lp_bld_swizzle.c \
-        gallivm/lp_bld_tgsi_aos.c \
-        gallivm/lp_bld_tgsi_info.c \
-        gallivm/lp_bld_tgsi_soa.c \
-        gallivm/lp_bld_type.c \
-        draw/draw_llvm.c \
-        draw/draw_llvm_sample.c \
-        draw/draw_llvm_translate.c \
-        draw/draw_vs_llvm.c \
-        draw/draw_pt_fetch_shade_pipeline_llvm.c
-
-GALLIVM_CPP_SOURCES = \
-	gallivm/lp_bld_debug.cpp \
-	gallivm/lp_bld_misc.cpp
-
-GENERATED_SOURCES = \
-	indices/u_indices_gen.c \
-	indices/u_unfilled_gen.c \
-	util/u_format_srgb.c \
-	util/u_format_table.c \
-	util/u_half.c
+# get source lists
+include Makefile.sources
 
+C_SOURCES += $(GENERATED_SOURCES)
 
 ifeq ($(MESA_LLVM),1)
 C_SOURCES += \
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
new file mode 100644
index 00000000000..2b1ff5e5e86
--- /dev/null
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -0,0 +1,193 @@
+C_SOURCES := \
+	cso_cache/cso_cache.c \
+	cso_cache/cso_context.c \
+	cso_cache/cso_hash.c \
+	draw/draw_context.c \
+	draw/draw_fs.c \
+	draw/draw_gs.c \
+	draw/draw_pipe.c \
+	draw/draw_pipe_aaline.c \
+	draw/draw_pipe_aapoint.c \
+	draw/draw_pipe_clip.c \
+	draw/draw_pipe_cull.c \
+	draw/draw_pipe_flatshade.c \
+	draw/draw_pipe_offset.c \
+	draw/draw_pipe_pstipple.c \
+	draw/draw_pipe_stipple.c \
+	draw/draw_pipe_twoside.c \
+	draw/draw_pipe_unfilled.c \
+	draw/draw_pipe_util.c \
+	draw/draw_pipe_validate.c \
+	draw/draw_pipe_vbuf.c \
+	draw/draw_pipe_wide_line.c \
+	draw/draw_pipe_wide_point.c \
+	draw/draw_pt.c \
+	draw/draw_pt_emit.c \
+	draw/draw_pt_fetch.c \
+	draw/draw_pt_fetch_emit.c \
+	draw/draw_pt_fetch_shade_emit.c \
+	draw/draw_pt_fetch_shade_pipeline.c \
+	draw/draw_pt_post_vs.c \
+	draw/draw_pt_so_emit.c \
+	draw/draw_pt_util.c \
+	draw/draw_pt_vsplit.c \
+	draw/draw_vertex.c \
+	draw/draw_vs.c \
+	draw/draw_vs_aos.c \
+	draw/draw_vs_aos_io.c \
+	draw/draw_vs_aos_machine.c \
+	draw/draw_vs_exec.c \
+	draw/draw_vs_ppc.c \
+	draw/draw_vs_sse.c \
+	draw/draw_vs_variant.c \
+	os/os_misc.c \
+	os/os_stream.c \
+	os/os_stream_log.c \
+	os/os_stream_null.c \
+	os/os_stream_stdc.c \
+	os/os_stream_str.c \
+	os/os_time.c \
+	pipebuffer/pb_buffer_fenced.c \
+	pipebuffer/pb_buffer_malloc.c \
+	pipebuffer/pb_bufmgr_alt.c \
+	pipebuffer/pb_bufmgr_cache.c \
+	pipebuffer/pb_bufmgr_debug.c \
+	pipebuffer/pb_bufmgr_mm.c \
+	pipebuffer/pb_bufmgr_ondemand.c \
+	pipebuffer/pb_bufmgr_pool.c \
+	pipebuffer/pb_bufmgr_slab.c \
+	pipebuffer/pb_validate.c \
+	rbug/rbug_connection.c \
+	rbug/rbug_context.c \
+	rbug/rbug_core.c \
+	rbug/rbug_demarshal.c \
+	rbug/rbug_texture.c \
+	rbug/rbug_shader.c \
+	rtasm/rtasm_cpu.c \
+	rtasm/rtasm_execmem.c \
+	rtasm/rtasm_ppc.c \
+	rtasm/rtasm_ppc_spe.c \
+	rtasm/rtasm_x86sse.c \
+	tgsi/tgsi_build.c \
+	tgsi/tgsi_dump.c \
+	tgsi/tgsi_exec.c \
+	tgsi/tgsi_info.c \
+	tgsi/tgsi_iterate.c \
+	tgsi/tgsi_parse.c \
+	tgsi/tgsi_ppc.c \
+	tgsi/tgsi_sanity.c \
+	tgsi/tgsi_scan.c \
+	tgsi/tgsi_sse2.c \
+	tgsi/tgsi_text.c \
+	tgsi/tgsi_transform.c \
+	tgsi/tgsi_ureg.c \
+	tgsi/tgsi_util.c \
+	translate/translate.c \
+	translate/translate_cache.c \
+	translate/translate_generic.c \
+	translate/translate_sse.c \
+	util/u_debug.c \
+	util/u_debug_describe.c \
+	util/u_debug_refcnt.c \
+	util/u_debug_stack.c \
+	util/u_debug_symbol.c \
+	util/u_dump_defines.c \
+	util/u_dump_state.c \
+	util/u_bitmask.c \
+	util/u_blit.c \
+	util/u_blitter.c \
+	util/u_cache.c \
+	util/u_caps.c \
+	util/u_cpu_detect.c \
+	util/u_dl.c \
+	util/u_draw.c \
+	util/u_draw_quad.c \
+	util/u_format.c \
+	util/u_format_other.c \
+	util/u_format_latc.c \
+	util/u_format_s3tc.c \
+	util/u_format_rgtc.c \
+	util/u_format_tests.c \
+	util/u_format_yuv.c \
+	util/u_format_zs.c \
+	util/u_framebuffer.c \
+	util/u_gen_mipmap.c \
+	util/u_handle_table.c \
+	util/u_hash.c \
+	util/u_hash_table.c \
+	util/u_index_modify.c \
+	util/u_keymap.c \
+	util/u_linear.c \
+	util/u_linkage.c \
+	util/u_network.c \
+	util/u_math.c \
+	util/u_mm.c \
+	util/u_pstipple.c \
+	util/u_rect.c \
+	util/u_ringbuffer.c \
+	util/u_sampler.c \
+	util/u_simple_shaders.c \
+	util/u_slab.c \
+	util/u_snprintf.c \
+	util/u_staging.c \
+	util/u_surface.c \
+	util/u_surfaces.c \
+	util/u_texture.c \
+	util/u_tile.c \
+	util/u_transfer.c \
+	util/u_resource.c \
+	util/u_upload_mgr.c \
+	util/u_vbuf_mgr.c \
+	vl/vl_csc.c \
+	vl/vl_compositor.c \
+	vl/vl_decoder.c \
+	vl/vl_mpeg12_decoder.c \
+	vl/vl_mpeg12_bitstream.c \
+	vl/vl_zscan.c \
+        vl/vl_idct.c \
+	vl/vl_mc.c \
+        vl/vl_vertex_buffers.c \
+        vl/vl_video_buffer.c
+
+GENERATED_SOURCES := \
+	indices/u_indices_gen.c \
+	indices/u_unfilled_gen.c \
+	util/u_format_srgb.c \
+	util/u_format_table.c \
+	util/u_half.c
+
+GALLIVM_SOURCES := \
+        gallivm/lp_bld_arit.c \
+        gallivm/lp_bld_assert.c \
+        gallivm/lp_bld_bitarit.c \
+        gallivm/lp_bld_const.c \
+        gallivm/lp_bld_conv.c \
+        gallivm/lp_bld_flow.c \
+        gallivm/lp_bld_format_aos.c \
+        gallivm/lp_bld_format_soa.c \
+        gallivm/lp_bld_format_yuv.c \
+        gallivm/lp_bld_gather.c \
+        gallivm/lp_bld_init.c \
+        gallivm/lp_bld_intr.c \
+        gallivm/lp_bld_logic.c \
+        gallivm/lp_bld_pack.c \
+        gallivm/lp_bld_printf.c \
+        gallivm/lp_bld_quad.c \
+        gallivm/lp_bld_sample.c \
+        gallivm/lp_bld_sample_aos.c \
+        gallivm/lp_bld_sample_soa.c \
+        gallivm/lp_bld_struct.c \
+        gallivm/lp_bld_swizzle.c \
+        gallivm/lp_bld_tgsi_aos.c \
+        gallivm/lp_bld_tgsi_info.c \
+        gallivm/lp_bld_tgsi_soa.c \
+        gallivm/lp_bld_type.c \
+        draw/draw_llvm.c \
+        draw/draw_llvm_sample.c \
+        draw/draw_llvm_translate.c \
+        draw/draw_vs_llvm.c \
+        draw/draw_pt_fetch_shade_pipeline_llvm.c
+
+GALLIVM_CPP_SOURCES := \
+	gallivm/lp_bld_debug.cpp \
+	gallivm/lp_bld_misc.cpp
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index d18f55f1644..e00040d97ab 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -47,201 +47,16 @@ env.Depends('util/u_format_table.c', [
     'util/u_format_pack.py', 
 ])
 
-source = [
-    'cso_cache/cso_cache.c',
-    'cso_cache/cso_context.c',
-    'cso_cache/cso_hash.c',
-    'draw/draw_context.c',
-    'draw/draw_fs.c',
-    'draw/draw_gs.c',
-    'draw/draw_pipe.c',
-    'draw/draw_pipe_aaline.c',
-    'draw/draw_pipe_aapoint.c',
-    'draw/draw_pipe_clip.c',
-    'draw/draw_pipe_cull.c',
-    'draw/draw_pipe_flatshade.c',
-    'draw/draw_pipe_offset.c',
-    'draw/draw_pipe_pstipple.c',
-    'draw/draw_pipe_stipple.c',
-    'draw/draw_pipe_twoside.c',
-    'draw/draw_pipe_unfilled.c',
-    'draw/draw_pipe_util.c',
-    'draw/draw_pipe_validate.c',
-    'draw/draw_pipe_vbuf.c',
-    'draw/draw_pipe_wide_line.c',
-    'draw/draw_pipe_wide_point.c',
-    'draw/draw_pt.c',
-    'draw/draw_pt_emit.c',
-    'draw/draw_pt_fetch.c',
-    'draw/draw_pt_fetch_emit.c',
-    'draw/draw_pt_fetch_shade_emit.c',
-    'draw/draw_pt_fetch_shade_pipeline.c',
-    'draw/draw_pt_post_vs.c',
-    'draw/draw_pt_so_emit.c',
-    'draw/draw_pt_util.c',
-    'draw/draw_pt_vsplit.c',
-    'draw/draw_vertex.c',
-    'draw/draw_vs.c',
-    'draw/draw_vs_aos.c',
-    'draw/draw_vs_aos_io.c',
-    'draw/draw_vs_aos_machine.c',
-    'draw/draw_vs_exec.c',
-    'draw/draw_vs_ppc.c',
-    'draw/draw_vs_sse.c',
-    'draw/draw_vs_variant.c',
-    #'indices/u_indices.c',
-    #'indices/u_unfilled_indices.c',
-    'indices/u_indices_gen.c',
-    'indices/u_unfilled_gen.c',
-    'os/os_misc.c',
-    'os/os_stream.c',
-    'os/os_stream_log.c',
-    'os/os_stream_null.c',
-    'os/os_stream_stdc.c',
-    'os/os_stream_str.c',
-    'os/os_time.c',
-    'pipebuffer/pb_buffer_fenced.c',
-    'pipebuffer/pb_buffer_malloc.c',
-    'pipebuffer/pb_bufmgr_alt.c',
-    'pipebuffer/pb_bufmgr_cache.c',
-    'pipebuffer/pb_bufmgr_debug.c',
-    'pipebuffer/pb_bufmgr_mm.c',
-    'pipebuffer/pb_bufmgr_ondemand.c',
-    'pipebuffer/pb_bufmgr_pool.c',
-    'pipebuffer/pb_bufmgr_slab.c',
-    'pipebuffer/pb_validate.c',
-    'rbug/rbug_connection.c',
-    'rbug/rbug_context.c',
-    'rbug/rbug_core.c',
-    'rbug/rbug_demarshal.c',
-    'rbug/rbug_shader.c',
-    'rbug/rbug_texture.c',
-    'rtasm/rtasm_cpu.c',
-    'rtasm/rtasm_execmem.c',
-    'rtasm/rtasm_ppc.c',
-    'rtasm/rtasm_ppc_spe.c',
-    'rtasm/rtasm_x86sse.c',
-    'tgsi/tgsi_build.c',
-    'tgsi/tgsi_dump.c',
-    'tgsi/tgsi_exec.c',
-    'tgsi/tgsi_info.c',
-    'tgsi/tgsi_iterate.c',
-    'tgsi/tgsi_parse.c',
-    'tgsi/tgsi_ppc.c',
-    'tgsi/tgsi_sanity.c',
-    'tgsi/tgsi_scan.c',
-    'tgsi/tgsi_sse2.c',
-    'tgsi/tgsi_text.c',
-    'tgsi/tgsi_transform.c',
-    'tgsi/tgsi_ureg.c',
-    'tgsi/tgsi_util.c',
-    'translate/translate.c',
-    'translate/translate_cache.c',
-    'translate/translate_generic.c',
-    'translate/translate_sse.c',
-    'util/u_bitmask.c',
-    'util/u_blit.c',
-    'util/u_blitter.c',
-    'util/u_cache.c',
-    'util/u_caps.c',
-    'util/u_cpu_detect.c',
-    'util/u_debug.c',
-    'util/u_debug_describe.c',
-    'util/u_debug_memory.c',
-    'util/u_debug_refcnt.c',
-    'util/u_debug_stack.c',
-    'util/u_debug_symbol.c',
-    'util/u_dump_defines.c',
-    'util/u_dump_state.c',
-    'util/u_dl.c',
-    'util/u_draw.c',
-    'util/u_draw_quad.c',
-    'util/u_format.c',
-    'util/u_format_other.c',
-    'util/u_format_latc.c',
-    'util/u_format_s3tc.c',
-    'util/u_format_rgtc.c',
-    'util/u_format_srgb.c',
-    'util/u_format_table.c',
-    'util/u_format_tests.c',
-    'util/u_format_yuv.c',
-    'util/u_format_zs.c',
-    'util/u_framebuffer.c',
-    'util/u_gen_mipmap.c',
-    'util/u_half.c',
-    'util/u_handle_table.c',
-    'util/u_hash.c',
-    'util/u_hash_table.c',
-    'util/u_index_modify.c',
-    'util/u_keymap.c',
-    'util/u_linear.c',
-    'util/u_linkage.c',
-    'util/u_network.c',
-    'util/u_math.c',
-    'util/u_mm.c',
-    'util/u_pstipple.c',
-    'util/u_rect.c',
-    'util/u_resource.c',
-    'util/u_ringbuffer.c',
-    'util/u_sampler.c',
-    'util/u_simple_shaders.c',
-    'util/u_slab.c',
-    'util/u_snprintf.c',
-    'util/u_staging.c',
-    'util/u_surface.c',
-    'util/u_surfaces.c',
-    'util/u_texture.c',
-    'util/u_tile.c',
-    'util/u_transfer.c',
-    'util/u_upload_mgr.c',
-    'util/u_vbuf_mgr.c',
-    'vl/vl_csc.c',
-    'vl/vl_compositor.c',
-    'vl/vl_decoder.c',
-    'vl/vl_mpeg12_decoder.c',
-    'vl/vl_mpeg12_bitstream.c',
-    'vl/vl_zscan.c',
-    'vl/vl_idct.c',
-    'vl/vl_mc.c',
-    'vl/vl_vertex_buffers.c',
-    'vl/vl_video_buffer.c',
-]
+source = env.ParseSourceList('Makefile.sources', [
+    'C_SOURCES',
+    'GENERATED_SOURCES'
+])
 
 if env['llvm']:
-    source += [
-        'gallivm/lp_bld_arit.c',
-        'gallivm/lp_bld_assert.c',
-        'gallivm/lp_bld_bitarit.c',
-        'gallivm/lp_bld_const.c',
-        'gallivm/lp_bld_conv.c',
-        'gallivm/lp_bld_debug.cpp',
-        'gallivm/lp_bld_flow.c',
-        'gallivm/lp_bld_format_aos.c',
-        'gallivm/lp_bld_format_soa.c',
-        'gallivm/lp_bld_format_yuv.c',
-        'gallivm/lp_bld_gather.c',
-        'gallivm/lp_bld_init.c',
-        'gallivm/lp_bld_intr.c',
-        'gallivm/lp_bld_logic.c',
-        'gallivm/lp_bld_misc.cpp',
-        'gallivm/lp_bld_pack.c',
-        'gallivm/lp_bld_printf.c',
-        'gallivm/lp_bld_quad.c',
-        'gallivm/lp_bld_sample.c',
-        'gallivm/lp_bld_sample_aos.c',
-        'gallivm/lp_bld_sample_soa.c',
-        'gallivm/lp_bld_struct.c',
-        'gallivm/lp_bld_swizzle.c',
-        'gallivm/lp_bld_tgsi_aos.c',
-        'gallivm/lp_bld_tgsi_info.c',
-        'gallivm/lp_bld_tgsi_soa.c',
-        'gallivm/lp_bld_type.c',
-        'draw/draw_llvm.c',
-        'draw/draw_llvm_sample.c',
-        'draw/draw_llvm_translate.c',
-        'draw/draw_pt_fetch_shade_pipeline_llvm.c',
-        'draw/draw_vs_llvm.c'
-    ]
+    source += env.ParseSourceList('Makefile.sources', [
+        'GALLIVM_SOURCES',
+        'GALLIVM_CPP_SOURCES'
+    ])
 
 gallium = env.ConvenienceLibrary(
     target = 'gallium',

From 29d7a0bb16dba1416e3a63f7f44cf82e307ac46a Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 25 Mar 2011 03:10:51 +0800
Subject: [PATCH 516/600] mesa: call _mesa_set_vp_override in glDrawTex*

The driver may install its own vertex shader.  _mesa_set_vp_override
must be called so that core mesa can generate correct fragment program..

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/main/drawtex.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/main/drawtex.c b/src/mesa/main/drawtex.c
index 2089cdfcef9..83485a928d8 100644
--- a/src/mesa/main/drawtex.c
+++ b/src/mesa/main/drawtex.c
@@ -45,11 +45,15 @@ draw_texture(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
       return;
    }
 
+   _mesa_set_vp_override(ctx, GL_TRUE);
+
    if (ctx->NewState)
       _mesa_update_state(ctx);
 
    ASSERT(ctx->Driver.DrawTex);
    ctx->Driver.DrawTex(ctx, x, y, z, width, height);
+
+   _mesa_set_vp_override(ctx, GL_FALSE);
 }
 
 

From 8f26b59f53d6d80bf7d3c39a4dd3c438a2c305a4 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <m.b.lankhorst@gmail.com>
Date: Fri, 29 Jul 2011 19:37:51 +0200
Subject: [PATCH 517/600] st/xorg: Advertise support for XvMC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Formats were based on a patch sent to xf86-video-nouveau by Bryan Cain

Signed-off-by: Maarten Lankhorst <m.b.lankhorst@gmail.com>

[Michel Dänzer: Add xorg_xvmc.c to SConscript.]
---
 src/gallium/state_trackers/xorg/SConscript    |   1 +
 .../state_trackers/xorg/xorg_tracker.h        |   7 ++
 src/gallium/state_trackers/xorg/xorg_xv.c     |   2 +
 src/gallium/state_trackers/xorg/xorg_xvmc.c   | 119 ++++++++++++++++++
 4 files changed, 129 insertions(+)
 create mode 100644 src/gallium/state_trackers/xorg/xorg_xvmc.c

diff --git a/src/gallium/state_trackers/xorg/SConscript b/src/gallium/state_trackers/xorg/SConscript
index 4ea4ec4ee8b..1768f701e48 100644
--- a/src/gallium/state_trackers/xorg/SConscript
+++ b/src/gallium/state_trackers/xorg/SConscript
@@ -32,6 +32,7 @@ sources = [
     'xorg_output.c',
     'xorg_renderer.c',
     'xorg_xv.c',
+    'xorg_xvmc.c',
 ]
 
 st_xorg = env.ConvenienceLibrary(
diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h
index 664e8c75730..84a3a2fa4e2 100644
--- a/src/gallium/state_trackers/xorg/xorg_tracker.h
+++ b/src/gallium/state_trackers/xorg/xorg_tracker.h
@@ -222,4 +222,11 @@ void
 xorg_xv_init(ScreenPtr pScreen);
 
 
+/***********************************************************************
+ * xorg_xvmc.c
+ */
+void
+xorg_xvmc_init(ScreenPtr pScreen, char *name);
+
+
 #endif /* _XORG_TRACKER_H_ */
diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c
index af4992fc2ed..67fd6dfb501 100644
--- a/src/gallium/state_trackers/xorg/xorg_xv.c
+++ b/src/gallium/state_trackers/xorg/xorg_xv.c
@@ -750,6 +750,8 @@ xorg_xv_init(ScreenPtr pScreen)
 
    if (num_adaptors) {
       xf86XVScreenInit(pScreen, adaptors, num_adaptors);
+      if (textured_adapter)
+         xorg_xvmc_init(pScreen, textured_adapter->name);
    } else {
       xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
                  "Disabling Xv because no adaptors could be initialized.\n");
diff --git a/src/gallium/state_trackers/xorg/xorg_xvmc.c b/src/gallium/state_trackers/xorg/xorg_xvmc.c
new file mode 100644
index 00000000000..0f3f3f00907
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xorg_xvmc.c
@@ -0,0 +1,119 @@
+#include "xorg_tracker.h"
+
+#include <xf86.h>
+#include <xf86xv.h>
+#include <xf86xvmc.h>
+#include <X11/extensions/Xv.h>
+#include <X11/extensions/XvMC.h>
+#include <fourcc.h>
+
+#define FOURCC_RGB 0x0000003
+#define XVIMAGE_RGB								\
+{										\
+	FOURCC_RGB,								\
+	XvRGB,									\
+	LSBFirst,								\
+	{									\
+		'R', 'G', 'B', 0x00,						\
+		0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71	\
+	},									\
+	32,									\
+	XvPacked,								\
+	1,									\
+	24, 0x00FF0000, 0x0000FF00, 0x000000FF,					\
+	0, 0, 0,								\
+	0, 0, 0,								\
+	0, 0, 0,								\
+	{									\
+		'B','G','R','X',						\
+		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0		\
+	},									\
+	XvTopToBottom								\
+}
+
+static int subpicture_index_list[] = {
+   FOURCC_RGB,
+   FOURCC_IA44,
+   FOURCC_AI44
+};
+
+static XF86MCImageIDList subpicture_list =
+{
+   sizeof(subpicture_index_list)/sizeof(*subpicture_index_list),
+   subpicture_index_list
+};
+
+static XF86MCSurfaceInfoRec yv12_mpeg2_surface =
+{
+   FOURCC_I420,
+   XVMC_CHROMA_FORMAT_420,
+   0,
+   2048, 2048, 2048, 2048,
+   XVMC_IDCT | XVMC_MPEG_2,
+   XVMC_SUBPICTURE_INDEPENDENT_SCALING | XVMC_BACKEND_SUBPICTURE,
+   &subpicture_list
+};
+
+static const XF86MCSurfaceInfoRec uyvy_mpeg2_surface =
+{
+   FOURCC_UYVY,
+   XVMC_CHROMA_FORMAT_422,
+   0,
+   2048, 2048, 2048, 2048,
+   XVMC_IDCT | XVMC_MPEG_2,
+   XVMC_SUBPICTURE_INDEPENDENT_SCALING | XVMC_BACKEND_SUBPICTURE,
+   &subpicture_list
+};
+
+static XF86MCSurfaceInfoPtr surfaces[] =
+{
+   (XF86MCSurfaceInfoPtr)&yv12_mpeg2_surface,
+   (XF86MCSurfaceInfoPtr)&uyvy_mpeg2_surface
+};
+
+static const XF86ImageRec rgb_subpicture = XVIMAGE_RGB;
+static const XF86ImageRec ia44_subpicture = XVIMAGE_IA44;
+static const XF86ImageRec ai44_subpicture = XVIMAGE_AI44;
+
+static XF86ImagePtr subpictures[] =
+{
+   (XF86ImagePtr)&rgb_subpicture,
+   (XF86ImagePtr)&ia44_subpicture,
+   (XF86ImagePtr)&ai44_subpicture
+};
+
+static const XF86MCAdaptorRec adaptor_template =
+{
+   "",
+   sizeof(surfaces)/sizeof(*surfaces),
+   surfaces,
+   sizeof(subpictures)/sizeof(*subpictures),
+   subpictures,
+   (xf86XvMCCreateContextProcPtr)NULL,
+   (xf86XvMCDestroyContextProcPtr)NULL,
+   (xf86XvMCCreateSurfaceProcPtr)NULL,
+   (xf86XvMCDestroySurfaceProcPtr)NULL,
+   (xf86XvMCCreateSubpictureProcPtr)NULL,
+   (xf86XvMCDestroySubpictureProcPtr)NULL
+};
+
+void
+xorg_xvmc_init(ScreenPtr pScreen, char *name)
+{
+   ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+   XF86MCAdaptorPtr adaptorXvMC = xf86XvMCCreateAdaptorRec();
+   if (!adaptorXvMC)
+      return;
+
+   *adaptorXvMC = adaptor_template;
+   adaptorXvMC->name = name;
+   xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+              "[XvMC] Associated with %s.\n", name);
+   if (!xf86XvMCScreenInit(pScreen, 1, &adaptorXvMC))
+      xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+                 "[XvMC] Failed to initialize extension.\n");
+   else
+      xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+                 "[XvMC] Extension initialized.\n");
+   xf86XvMCDestroyAdaptorRec(adaptorXvMC);
+}

From 1e3bcbdf31f09666ba358f35ff9486faee3642ca Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 25 Feb 2011 14:45:33 -0800
Subject: [PATCH 518/600] glsl: Add a new ir_txs (textureSize) opcode to
 ir_texture.

One unique aspect of TXS is that it doesn't have a coordinate.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
---
 src/glsl/ir.cpp                              | 16 +++++----
 src/glsl/ir.h                                |  4 ++-
 src/glsl/ir_clone.cpp                        |  4 ++-
 src/glsl/ir_hv_accept.cpp                    |  9 +++--
 src/glsl/ir_print_visitor.cpp                | 21 ++++++-----
 src/glsl/ir_reader.cpp                       | 37 ++++++++++++--------
 src/glsl/ir_rvalue_visitor.cpp               |  1 +
 src/glsl/opt_tree_grafting.cpp               |  1 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  6 +++-
 src/mesa/program/ir_to_mesa.cpp              |  1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp   |  1 +
 11 files changed, 65 insertions(+), 36 deletions(-)

diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 6f8676ecceb..41ed4f114ca 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1121,7 +1121,7 @@ ir_dereference::is_lvalue() const
 }
 
 
-const char *tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf" };
+const char *tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txs" };
 
 const char *ir_texture::opcode_string()
 {
@@ -1150,11 +1150,15 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
    this->sampler = sampler;
    this->type = type;
 
-   assert(sampler->type->sampler_type == (int) type->base_type);
-   if (sampler->type->sampler_shadow)
-      assert(type->vector_elements == 4 || type->vector_elements == 1);
-   else
-      assert(type->vector_elements == 4);
+   if (this->op == ir_txs) {
+      assert(type->base_type == GLSL_TYPE_INT);
+   } else {
+      assert(sampler->type->sampler_type == (int) type->base_type);
+      if (sampler->type->sampler_shadow)
+	 assert(type->vector_elements == 4 || type->vector_elements == 1);
+      else
+	 assert(type->vector_elements == 4);
+   }
 }
 
 
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 04fa97bf56f..990aaa16af3 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1212,7 +1212,8 @@ enum ir_texture_opcode {
    ir_txb,		/**< Texture look-up with LOD bias */
    ir_txl,		/**< Texture look-up with explicit LOD */
    ir_txd,		/**< Texture look-up with partial derivatvies */
-   ir_txf		/**< Texel fetch with explicit LOD */
+   ir_txf,		/**< Texel fetch with explicit LOD */
+   ir_txs		/**< Texture size */
 };
 
 
@@ -1233,6 +1234,7 @@ enum ir_texture_opcode {
  * (txl <type> <sampler> <coordinate> 0 1 ( ) <lod>)
  * (txd <type> <sampler> <coordinate> 0 1 ( ) (dPdx dPdy))
  * (txf <type> <sampler> <coordinate> 0       <lod>)
+ * (txs <type> <sampler> <lod>)
  */
 class ir_texture : public ir_rvalue {
 public:
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index 069bb85e8de..f0757365dd2 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -222,7 +222,8 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const
    new_tex->type = this->type;
 
    new_tex->sampler = this->sampler->clone(mem_ctx, ht);
-   new_tex->coordinate = this->coordinate->clone(mem_ctx, ht);
+   if (this->coordinate)
+      new_tex->coordinate = this->coordinate->clone(mem_ctx, ht);
    if (this->projector)
       new_tex->projector = this->projector->clone(mem_ctx, ht);
    if (this->shadow_comparitor) {
@@ -240,6 +241,7 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const
       break;
    case ir_txl:
    case ir_txf:
+   case ir_txs:
       new_tex->lod_info.lod = this->lod_info.lod->clone(mem_ctx, ht);
       break;
    case ir_txd:
diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp
index 4a607dc8749..d33fc85bf0a 100644
--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -171,9 +171,11 @@ ir_texture::accept(ir_hierarchical_visitor *v)
    if (s != visit_continue)
       return (s == visit_continue_with_parent) ? visit_continue : s;
 
-   s = this->coordinate->accept(v);
-   if (s != visit_continue)
-      return (s == visit_continue_with_parent) ? visit_continue : s;
+   if (this->coordinate) {
+      s = this->coordinate->accept(v);
+      if (s != visit_continue)
+	 return (s == visit_continue_with_parent) ? visit_continue : s;
+   }
 
    if (this->projector) {
       s = this->projector->accept(v);
@@ -203,6 +205,7 @@ ir_texture::accept(ir_hierarchical_visitor *v)
       break;
    case ir_txl:
    case ir_txf:
+   case ir_txs:
       s = this->lod_info.lod->accept(v);
       if (s != visit_continue)
 	 return (s == visit_continue_with_parent) ? visit_continue : s;
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 518910bd129..ea7858224bb 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -244,19 +244,21 @@ void ir_print_visitor::visit(ir_texture *ir)
    ir->sampler->accept(this);
    printf(" ");
 
-   ir->coordinate->accept(this);
+   if (ir->op != ir_txs) {
+      ir->coordinate->accept(this);
 
-   printf(" ");
+      printf(" ");
 
-   if (ir->offset != NULL) {
-      ir->offset->accept(this);
-   } else {
-      printf("0");
+      if (ir->offset != NULL) {
+	 ir->offset->accept(this);
+      } else {
+	 printf("0");
+      }
+
+      printf(" ");
    }
 
-   printf(" ");
-
-   if (ir->op != ir_txf) {
+   if (ir->op != ir_txf && ir->op != ir_txs) {
       if (ir->projector)
 	 ir->projector->accept(this);
       else
@@ -280,6 +282,7 @@ void ir_print_visitor::visit(ir_texture *ir)
       break;
    case ir_txl:
    case ir_txf:
+   case ir_txs:
       ir->lod_info.lod->accept(this);
       break;
    case ir_txd:
diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp
index f3a621734ba..22009eebcb9 100644
--- a/src/glsl/ir_reader.cpp
+++ b/src/glsl/ir_reader.cpp
@@ -885,6 +885,8 @@ ir_reader::read_texture(s_expression *expr)
       { "tex", s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow };
    s_pattern txf_pattern[] =
       { "txf", s_type, s_sampler, s_coord, s_offset, s_lod };
+   s_pattern txs_pattern[] =
+      { "txs", s_type, s_sampler, s_lod };
    s_pattern other_pattern[] =
       { tag, s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow, s_lod };
 
@@ -892,6 +894,8 @@ ir_reader::read_texture(s_expression *expr)
       op = ir_tex;
    } else if (MATCH(expr, txf_pattern)) {
       op = ir_txf;
+   } else if (MATCH(expr, txs_pattern)) {
+      op = ir_txs;
    } else if (MATCH(expr, other_pattern)) {
       op = ir_texture::get_opcode(tag->value());
       if (op == -1)
@@ -920,25 +924,27 @@ ir_reader::read_texture(s_expression *expr)
    }
    tex->set_sampler(sampler, type);
 
-   // Read coordinate (any rvalue)
-   tex->coordinate = read_rvalue(s_coord);
-   if (tex->coordinate == NULL) {
-      ir_read_error(NULL, "when reading coordinate in (%s ...)",
-		    tex->opcode_string());
-      return NULL;
-   }
-
-   // Read texel offset - either 0 or an rvalue.
-   s_int *si_offset = SX_AS_INT(s_offset);
-   if (si_offset == NULL || si_offset->value() != 0) {
-      tex->offset = read_rvalue(s_offset);
-      if (tex->offset == NULL) {
-	 ir_read_error(s_offset, "expected 0 or an expression");
+   if (op != ir_txs) {
+      // Read coordinate (any rvalue)
+      tex->coordinate = read_rvalue(s_coord);
+      if (tex->coordinate == NULL) {
+	 ir_read_error(NULL, "when reading coordinate in (%s ...)",
+		       tex->opcode_string());
 	 return NULL;
       }
+
+      // Read texel offset - either 0 or an rvalue.
+      s_int *si_offset = SX_AS_INT(s_offset);
+      if (si_offset == NULL || si_offset->value() != 0) {
+	 tex->offset = read_rvalue(s_offset);
+	 if (tex->offset == NULL) {
+	    ir_read_error(s_offset, "expected 0 or an expression");
+	    return NULL;
+	 }
+      }
    }
 
-   if (op != ir_txf) {
+   if (op != ir_txf && op != ir_txs) {
       s_int *proj_as_int = SX_AS_INT(s_proj);
       if (proj_as_int && proj_as_int->value() == 1) {
 	 tex->projector = NULL;
@@ -973,6 +979,7 @@ ir_reader::read_texture(s_expression *expr)
       break;
    case ir_txl:
    case ir_txf:
+   case ir_txs:
       tex->lod_info.lod = read_rvalue(s_lod);
       if (tex->lod_info.lod == NULL) {
 	 ir_read_error(NULL, "when reading LOD in (%s ...)",
diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp
index ed6c7cb6a1a..193bcd2d7bd 100644
--- a/src/glsl/ir_rvalue_visitor.cpp
+++ b/src/glsl/ir_rvalue_visitor.cpp
@@ -63,6 +63,7 @@ ir_rvalue_visitor::visit_leave(ir_texture *ir)
       break;
    case ir_txf:
    case ir_txl:
+   case ir_txs:
       handle_rvalue(&ir->lod_info.lod);
       break;
    case ir_txd:
diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp
index 1ef940f9c72..22a1749b9dd 100644
--- a/src/glsl/opt_tree_grafting.cpp
+++ b/src/glsl/opt_tree_grafting.cpp
@@ -258,6 +258,7 @@ ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
       break;
    case ir_txf:
    case ir_txl:
+   case ir_txs:
       if (do_graft(&ir->lod_info.lod))
 	 return visit_stop;
       break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 33ad12763f7..764351a34f9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -716,6 +716,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       inst = emit(FS_OPCODE_TXD, dst);
       break;
    case ir_txf:
+   case ir_txs:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
@@ -837,6 +838,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       break;
    }
    case ir_txf:
+   case ir_txs:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
@@ -926,6 +928,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       break;
    }
    case ir_txf:
+   case ir_txs:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
@@ -949,7 +952,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
    case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break;
    case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break;
-   case ir_txf: assert(!"TXF unsupported.");
+   case ir_txf: assert(!"TXF unsupported."); break;
+   case ir_txs: assert(!"TXS unsupported."); break;
    }
    inst->base_mrf = base_mrf;
    inst->mlen = mlen;
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index ec3fba182d8..b222005d1a9 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2148,6 +2148,7 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
       dy = this->result;
       break;
    case ir_txf:
+   case ir_txs:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 4b3e00c4242..6f0d9fa3f8f 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2469,6 +2469,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       ir->lod_info.grad.dPdy->accept(this);
       dy = this->result;
       break;
+   case ir_txs:
    case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
       assert(!"GLSL 1.30 features unsupported");
       break;

From 583b295bbf5777ce5ec0d57ce46269ceea4285c1 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 25 Feb 2011 15:14:22 -0800
Subject: [PATCH 519/600] texture_builtins.py: Add support for textureSize
 (txs).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/glsl/builtins/tools/texture_builtins.py | 50 +++++++++++++++------
 1 file changed, 36 insertions(+), 14 deletions(-)

diff --git a/src/glsl/builtins/tools/texture_builtins.py b/src/glsl/builtins/tools/texture_builtins.py
index a4054caac98..7e569bf562b 100755
--- a/src/glsl/builtins/tools/texture_builtins.py
+++ b/src/glsl/builtins/tools/texture_builtins.py
@@ -44,6 +44,11 @@ def get_extra_dim(sampler_type, use_proj, unused_fields):
         extra_dim += 1
     return extra_dim
 
+def get_txs_dim(sampler_type):
+    if sampler_type.startswith("Cube"):
+        return 2
+    return get_coord_dim(sampler_type)
+
 def generate_sigs(g, tex_inst, sampler_type, variant = 0, unused_fields = 0):
     coord_dim = get_coord_dim(sampler_type)
     extra_dim = get_extra_dim(sampler_type, variant & Proj, unused_fields)
@@ -51,17 +56,20 @@ def generate_sigs(g, tex_inst, sampler_type, variant = 0, unused_fields = 0):
 
     if variant & Single:
         return_type = "float"
+    elif tex_inst == "txs":
+        return_type = vec_type("i", get_txs_dim(sampler_type))
     else:
         return_type = g + "vec4"
 
     # Print parameters
     print "   (signature", return_type
     print "     (parameters"
-    print "       (declare (in) " + g + "sampler" + sampler_type + " sampler)"
-    print "       (declare (in) " + vec_type("i" if tex_inst == "txf" else "", coord_dim + extra_dim) + " P)",
+    print "       (declare (in) " + g + "sampler" + sampler_type + " sampler)",
+    if tex_inst != "txs":
+        print "\n       (declare (in) " + vec_type("i" if tex_inst == "txf" else "", coord_dim + extra_dim) + " P)",
     if tex_inst == "txl":
         print "\n       (declare (in) float lod)",
-    elif tex_inst == "txf":
+    elif tex_inst == "txf" or tex_inst == "txs":
         print "\n       (declare (in) int lod)",
     elif tex_inst == "txd":
         grad_type = vec_type("", coord_dim)
@@ -75,18 +83,19 @@ def generate_sigs(g, tex_inst, sampler_type, variant = 0, unused_fields = 0):
 
     print ")\n     ((return (" + tex_inst, return_type, "(var_ref sampler)",
 
-    # Coordinate
-    if extra_dim > 0:
-        print "(swiz " + "xyzw"[:coord_dim] + " (var_ref P))",
-    else:
-        print "(var_ref P)",
+    if tex_inst != "txs":
+        # Coordinate
+        if extra_dim > 0:
+            print "(swiz " + "xyzw"[:coord_dim] + " (var_ref P))",
+        else:
+            print "(var_ref P)",
 
-    if variant & Offset:
-        print "(var_ref offset)",
-    else:
-        print "0",
+        if variant & Offset:
+            print "(var_ref offset)",
+        else:
+            print "0",
 
-    if tex_inst != "txf":
+    if tex_inst != "txf" and tex_inst != "txs":
         # Projective divisor
         if variant & Proj:
             print "(swiz " + "xyzw"[coord_dim + extra_dim-1] + " (var_ref P))",
@@ -104,7 +113,7 @@ def generate_sigs(g, tex_inst, sampler_type, variant = 0, unused_fields = 0):
     # Bias/explicit LOD/gradient:
     if tex_inst == "txb":
         print "(var_ref bias)",
-    elif tex_inst == "txl" or tex_inst == "txf":
+    elif tex_inst == "txl" or tex_inst == "txf" or tex_inst == "txs":
         print "(var_ref lod)",
     elif tex_inst == "txd":
         print "((var_ref dPdx) (var_ref dPdy))",
@@ -130,6 +139,19 @@ def end_function(fs, name):
 #
 # Takes a dictionary as an argument.
 def generate_texture_functions(fs):
+    start_function("textureSize")
+    generate_fiu_sigs("txs", "1D")
+    generate_fiu_sigs("txs", "2D")
+    generate_fiu_sigs("txs", "3D")
+    generate_fiu_sigs("txs", "Cube")
+    generate_fiu_sigs("txs", "1DArray")
+    generate_fiu_sigs("txs", "2DArray")
+    generate_sigs("", "txs", "1DShadow")
+    generate_sigs("", "txs", "2DShadow")
+    generate_sigs("", "txs", "1DArrayShadow")
+    generate_sigs("", "txs", "2DArrayShadow")
+    end_function(fs, "textureSize")
+
     start_function("texture")
     generate_fiu_sigs("tex", "1D")
     generate_fiu_sigs("tex", "2D")

From 2054652796d78996b1f9308ab1a75361d71e864e Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 25 Feb 2011 15:14:47 -0800
Subject: [PATCH 520/600] glsl/builtins: Uncomment textureSize prototypes.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
---
 src/glsl/builtins/profiles/130.frag | 2 --
 src/glsl/builtins/profiles/130.vert | 2 --
 2 files changed, 4 deletions(-)

diff --git a/src/glsl/builtins/profiles/130.frag b/src/glsl/builtins/profiles/130.frag
index 0e3c7ac4199..c121859f14c 100644
--- a/src/glsl/builtins/profiles/130.frag
+++ b/src/glsl/builtins/profiles/130.frag
@@ -465,7 +465,6 @@ bvec4 not(bvec4 x);
  * 8.7 - Texture Lookup Functions
  */
 
-#if 0
 /* textureSize */
 int   textureSize( sampler1D sampler, int lod);
 int   textureSize(isampler1D sampler, int lod);
@@ -496,7 +495,6 @@ ivec3 textureSize(usampler2DArray sampler, int lod);
 
 ivec2 textureSize(sampler1DArrayShadow sampler, int lod);
 ivec3 textureSize(sampler2DArrayShadow sampler, int lod);
-#endif
 
 /* texture - no bias */
  vec4 texture( sampler1D sampler, float P);
diff --git a/src/glsl/builtins/profiles/130.vert b/src/glsl/builtins/profiles/130.vert
index f85b27f8f8c..ebd9a508851 100644
--- a/src/glsl/builtins/profiles/130.vert
+++ b/src/glsl/builtins/profiles/130.vert
@@ -467,7 +467,6 @@ bvec4 not(bvec4 x);
  * 8.7 - Texture Lookup Functions
  */
 
-#if 0
 /* textureSize */
 int   textureSize( sampler1D sampler, int lod);
 int   textureSize(isampler1D sampler, int lod);
@@ -498,7 +497,6 @@ ivec3 textureSize(usampler2DArray sampler, int lod);
 
 ivec2 textureSize(sampler1DArrayShadow sampler, int lod);
 ivec3 textureSize(sampler2DArrayShadow sampler, int lod);
-#endif
 
 /* texture - no bias */
  vec4 texture( sampler1D sampler, float P);

From b6bdcf2a908889532ef6d5eb643791176dffcb9d Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 18 Aug 2011 00:18:15 -0700
Subject: [PATCH 521/600] i965/fs: Rudimentary support for non-floating point
 texture results.

Not all texturing operations return floating point data.  For example,
the resinfo message (textureSize or TXS) returns integer data.  In the
future, we'll also add integer texture support.

ir_texture's type field contains this information; use its base type to
appropriately type the destination register.  We want to keep it as a
four component vector, however, since SIMD8 samplers always have a
response length of 4.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 764351a34f9..792799d9063 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1075,7 +1075,7 @@ fs_visitor::visit(ir_texture *ir)
    /* Writemasking doesn't eliminate channels on SIMD8 texture
     * samples, so don't worry about them.
     */
-   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
+   fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
 
    if (intel->gen >= 7) {
       inst = emit_texture_gen7(ir, dst, coordinate, sampler);

From ecf8963754489abfb5097c130a9bcd4cdb76b6bd Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sun, 19 Jun 2011 01:47:50 -0700
Subject: [PATCH 522/600] i965/fs: Implement textureSize (TXS) on Gen5+.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/i965/brw_defines.h      |  2 ++
 src/mesa/drivers/dri/i965/brw_fs.cpp         |  1 +
 src/mesa/drivers/dri/i965/brw_fs.h           |  3 ++-
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp    |  4 +++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 28 +++++++++++++++-----
 src/mesa/program/ir_to_mesa.cpp              |  7 +++--
 6 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index b740d87c933..69e0026ee6b 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -634,6 +634,7 @@ enum opcode {
    FS_OPCODE_TXB,
    FS_OPCODE_TXD,
    FS_OPCODE_TXL,
+   FS_OPCODE_TXS,
    FS_OPCODE_DISCARD,
    FS_OPCODE_SPILL,
    FS_OPCODE_UNSPILL,
@@ -781,6 +782,7 @@ enum opcode {
 #define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS       4
 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE  6
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
 
 /* for GEN5 only */
 #define BRW_SAMPLER_SIMD_MODE_SIMD4X2                   0
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c8f74252654..0b0445ea142 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -157,6 +157,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
    case FS_OPCODE_TXB:
    case FS_OPCODE_TXD:
    case FS_OPCODE_TXL:
+   case FS_OPCODE_TXS:
       return 1;
    case FS_OPCODE_FB_WRITE:
       return 2;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 94af0e1af16..10f45f30fe9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -291,7 +291,8 @@ public:
       return (opcode == FS_OPCODE_TEX ||
 	      opcode == FS_OPCODE_TXB ||
 	      opcode == FS_OPCODE_TXD ||
-	      opcode == FS_OPCODE_TXL);
+	      opcode == FS_OPCODE_TXL ||
+	      opcode == FS_OPCODE_TXS);
    }
 
    bool is_math()
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 482d250c333..5c057e9a00b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -242,6 +242,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
 	 }
 	 break;
+      case FS_OPCODE_TXS:
+	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
+	 break;
       case FS_OPCODE_TXD:
 	 /* There is no sample_d_c message; comparisons are done manually */
 	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
@@ -775,6 +778,7 @@ fs_visitor::generate_code()
       case FS_OPCODE_TXB:
       case FS_OPCODE_TXD:
       case FS_OPCODE_TXL:
+      case FS_OPCODE_TXS:
 	 generate_tex(inst, dst, src[0]);
 	 break;
       case FS_OPCODE_DISCARD:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 792799d9063..3551e3dfe81 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -751,6 +751,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    int base_mrf = 2;
    int reg_width = c->dispatch_width / 8;
    bool header_present = false;
+   const int vector_elements =
+      ir->coordinate ? ir->coordinate->type->vector_elements : 0;
 
    if (ir->offset) {
       /* The offsets set up by the ir_texture visitor are in the
@@ -761,7 +763,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       base_mrf--;
    }
 
-   for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+   for (int i = 0; i < vector_elements; i++) {
       fs_inst *inst = emit(BRW_OPCODE_MOV,
 			   fs_reg(MRF, base_mrf + mlen + i * reg_width),
 			   coordinate);
@@ -769,7 +771,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
 	 inst->saturate = true;
       coordinate.reg_offset++;
    }
-   mlen += ir->coordinate->type->vector_elements * reg_width;
+   mlen += vector_elements * reg_width;
 
    if (ir->shadow_comparitor && ir->op != ir_txd) {
       mlen = MAX2(mlen, header_present + 4 * reg_width);
@@ -837,8 +839,14 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       inst = emit(FS_OPCODE_TXD, dst);
       break;
    }
-   case ir_txf:
    case ir_txs:
+      this->result = reg_undef;
+      ir->lod_info.lod->accept(this);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+      mlen += reg_width;
+      inst = emit(FS_OPCODE_TXS, dst);
+      break;
+   case ir_txf:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
@@ -927,14 +935,19 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       }
       break;
    }
-   case ir_txf:
    case ir_txs:
+      this->result = reg_undef;
+      ir->lod_info.lod->accept(this);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+      mlen += reg_width;
+      break;
+   case ir_txf:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }
 
    /* Set up the coordinate (except for TXD where it was done earlier) */
-   if (ir->op != ir_txd) {
+   if (ir->op != ir_txd && ir->op != ir_txs) {
       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
 	 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
 			      coordinate);
@@ -953,7 +966,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break;
    case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break;
    case ir_txf: assert(!"TXF unsupported."); break;
-   case ir_txs: assert(!"TXS unsupported."); break;
+   case ir_txs: inst = emit(FS_OPCODE_TXS, dst); break;
    }
    inst->base_mrf = base_mrf;
    inst->mlen = mlen;
@@ -988,7 +1001,8 @@ fs_visitor::visit(ir_texture *ir)
    }
 
    this->result = reg_undef;
-   ir->coordinate->accept(this);
+   if (ir->coordinate)
+      ir->coordinate->accept(this);
    fs_reg coordinate = this->result;
 
    if (ir->offset != NULL) {
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index b222005d1a9..e7609df19ee 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2104,7 +2104,10 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
    ir_to_mesa_instruction *inst = NULL;
    prog_opcode opcode = OPCODE_NOP;
 
-   ir->coordinate->accept(this);
+   if (ir->op == ir_txs)
+      this->result = src_reg_for_float(0.0);
+   else
+      ir->coordinate->accept(this);
 
    /* Put our coords in a temp.  We'll need to modify them for shadow,
     * projection, or LOD, so the only case we'd use it as is is if
@@ -2128,6 +2131,7 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
 
    switch (ir->op) {
    case ir_tex:
+   case ir_txs:
       opcode = OPCODE_TEX;
       break;
    case ir_txb:
@@ -2148,7 +2152,6 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
       dy = this->result;
       break;
    case ir_txf:
-   case ir_txs:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }

From 4eeb4c150598605d1be3ce6674fa63076a720ae9 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 17 Aug 2011 10:45:47 -0700
Subject: [PATCH 523/600] i965: Implement textureSize (TXS) on Gen4.

Also, remove the BRW_SAMPLER_MESSAGE_SIMD8_RESINFO #define because
there totally isn't a SIMD8 variant.

Unfortunately, resinfo returns FLOAT32 on Broadwater/Crestline, unlike
G45 which returns a proper UINT32.  This turns out to be simple,
however: when we emit MOVs to select the desired half of the SIMD16
result, we can simply override the register type to be float so it's
converted to an integer.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/i965/brw_defines.h      |  1 -
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp    |  5 +++++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 23 +++++++++++++++-----
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 69e0026ee6b..d1799c0ab4f 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -769,7 +769,6 @@ enum opcode {
 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
 #define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
-#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO             2
 #define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
 #define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3
 #define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 5c057e9a00b..28efbd3605f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -292,6 +292,11 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	 assert(inst->mlen == 7 || inst->mlen == 10);
 	 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
 	 break;
+      case FS_OPCODE_TXS:
+	 assert(inst->mlen == 3);
+	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
+	 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+	 break;
       default:
 	 assert(!"not reached");
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 3551e3dfe81..cbc0af02407 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -657,10 +657,18 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
 	 dPdy.reg_offset++;
       }
       mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
+   } else if (ir->op == ir_txs) {
+      /* There's no SIMD8 resinfo message on Gen4.  Use SIMD16 instead. */
+      simd16 = true;
+      this->result = reg_undef;
+      ir->lod_info.lod->accept(this);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
+      mlen += 2;
    } else {
       /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
        * instructions.  We'll need to do SIMD16 here.
        */
+      simd16 = true;
       assert(ir->op == ir_txb || ir->op == ir_txl);
 
       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
@@ -689,16 +697,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
 
       /* The unused upper half. */
       mlen++;
+   }
 
+   if (simd16) {
       /* Now, since we're doing simd16, the return is 2 interleaved
        * vec4s where the odd-indexed ones are junk. We'll need to move
        * this weirdness around to the expected layout.
        */
-      simd16 = true;
       orig_dst = dst;
-      dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
-						       2));
-      dst.type = BRW_REGISTER_TYPE_F;
+      const glsl_type *vec_type =
+	 glsl_type::get_instance(ir->type->base_type, 4, 1);
+      dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2));
+      dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type)
+			       : BRW_REGISTER_TYPE_F;
    }
 
    fs_inst *inst = NULL;
@@ -715,8 +726,10 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    case ir_txd:
       inst = emit(FS_OPCODE_TXD, dst);
       break;
-   case ir_txf:
    case ir_txs:
+      inst = emit(FS_OPCODE_TXS, dst);
+      break;
+   case ir_txf:
       assert(!"GLSL 1.30 features unsupported");
       break;
    }

From 9d4b98eb9eadecc17cd1cda0074b420a39e74647 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 17 Aug 2011 13:41:43 -0700
Subject: [PATCH 524/600] i965/gen6+: Use non-normalized coordinates for
 GL_TEXTURE_RECTANGLE.

Improves performance of a GL_TEXTURE_RECTANGLE microbenchmark by 1.84%
+/- .15% (n=3)
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp     | 3 ++-
 src/mesa/drivers/dri/i965/brw_wm_fp.c            | 4 +++-
 src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 7 +++++++
 src/mesa/drivers/dri/i965/gen7_sampler_state.c   | 7 +++++++
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index cbc0af02407..cdaf543c88b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1056,7 +1056,8 @@ fs_visitor::visit(ir_texture *ir)
     * texture coordinates.  We use the program parameter state
     * tracking to get the scaling factor.
     */
-   if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
+   if (intel->gen < 6 &&
+       ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
       struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
       int tokens[STATE_LENGTH] = {
 	 STATE_INTERNAL,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index d52a9581f5e..bd46bd8de43 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -664,6 +664,8 @@ static void precalc_lit( struct brw_wm_compile *c,
 static void precalc_tex( struct brw_wm_compile *c,
 			 const struct prog_instruction *inst )
 {
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
    struct prog_src_register coord;
    struct prog_dst_register tmpcoord = { 0 };
    const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
@@ -727,7 +729,7 @@ static void precalc_tex( struct brw_wm_compile *c,
        release_temp(c, tmp0);
        release_temp(c, tmp1);
    }
-   else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+   else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
       struct prog_src_register scale = 
 	 search_or_add_param5( c, 
 			       STATE_INTERNAL, 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 98146136703..6834ebad780 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -289,6 +289,13 @@ static void brw_update_sampler_state(struct brw_context *brw,
    sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6);
    sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6);
 
+   /* On Gen6+, the sampler can handle non-normalized texture
+    * rectangle coordinates natively
+    */
+   if (intel->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) {
+      sampler->ss3.non_normalized_coord = 1;
+   }
+
    upload_default_color(brw, gl_sampler, unit);
 
    if (intel->gen >= 6) {
diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
index e787c21f4d1..aee67c87472 100644
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@@ -157,6 +157,13 @@ gen7_update_sampler_state(struct brw_context *brw, int unit,
    sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 8);
    sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 8);
 
+   /* The sampler can handle non-normalized texture rectangle coordinates
+    * natively
+    */
+   if (texObj->Target == GL_TEXTURE_RECTANGLE) {
+      sampler->ss3.non_normalized_coord = 1;
+   }
+
    upload_default_color(brw, gl_sampler, unit);
 
    sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5;

From abbb8fc3a7d49066ecca10cb9db0b4756a1bbef0 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 23 Aug 2011 10:51:16 -0700
Subject: [PATCH 525/600] i965: Fix typo in
 2b224d66a01f3ce867fb05558b25749705bbfe7a

Unfortunately, since a previous efficiency improvement, we no longer
have any open-source testcases producing register spilling, so this
code was untested in the fragment shader path.  That should change
when we get proper temporary array support in the fragment shader.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=40194
---
 src/mesa/drivers/dri/i965/brw_wm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index a4524fc7889..e76832515fe 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -246,7 +246,7 @@ bool do_wm_prog(struct brw_context *brw,
    if (c->last_scratch) {
       c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch);
 
-      brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
+      brw_get_scratch_bo(intel, &brw->wm.scratch_bo,
 			 c->prog_data.total_scratch * brw->wm_max_threads);
    }
 

From 0457655035cda3678208cd2850fa75a364a59ca9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Tue, 23 Aug 2011 19:49:43 +0100
Subject: [PATCH 526/600] make: Add missing source file.

---
 src/gallium/auxiliary/Makefile.sources | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 2b1ff5e5e86..07cc3156f07 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -88,6 +88,7 @@ C_SOURCES := \
 	translate/translate_sse.c \
 	util/u_debug.c \
 	util/u_debug_describe.c \
+	util/u_debug_memory.c \
 	util/u_debug_refcnt.c \
 	util/u_debug_stack.c \
 	util/u_debug_symbol.c \

From 56f0c00f125ee75caeadc1c9e8cab8a488635e5e Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 16:59:30 -0700
Subject: [PATCH 527/600] mesa: Remove target parameter from
 dd_function_table::UnmapBuffer

No driver used that parameter, and most drivers ended up with a bunch
of unused-parameter warnings because it was there.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c        |  2 +-
 src/mesa/drivers/dri/intel/intel_buffer_objects.c  | 14 ++++++--------
 src/mesa/drivers/dri/intel/intel_pixel_bitmap.c    |  3 +--
 src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c   |  2 +-
 src/mesa/drivers/dri/r300/r300_draw.c              |  8 ++++----
 src/mesa/drivers/dri/r600/evergreen_render.c       |  8 ++++----
 src/mesa/drivers/dri/r600/r700_render.c            |  8 ++++----
 .../drivers/dri/radeon/radeon_buffer_objects.c     |  1 -
 src/mesa/drivers/x11/xm_dd.c                       |  6 ++----
 src/mesa/main/api_arrayelt.c                       |  4 +---
 src/mesa/main/api_validate.c                       |  2 +-
 src/mesa/main/bufferobj.c                          | 14 ++++++--------
 src/mesa/main/dd.h                                 |  2 +-
 src/mesa/main/dlist.c                              |  3 +--
 src/mesa/main/pbo.c                                |  8 +++-----
 src/mesa/main/shared.c                             |  2 +-
 src/mesa/main/texgetimage.c                        |  6 ++----
 src/mesa/state_tracker/st_cb_bufferobjects.c       |  2 +-
 src/mesa/tnl/t_draw.c                              |  4 +---
 src/mesa/vbo/vbo_exec_api.c                        |  2 +-
 src/mesa/vbo/vbo_exec_array.c                      | 12 +++++-------
 src/mesa/vbo/vbo_exec_draw.c                       |  2 +-
 src/mesa/vbo/vbo_rebase.c                          |  4 +---
 src/mesa/vbo/vbo_save_api.c                        |  2 +-
 src/mesa/vbo/vbo_save_draw.c                       |  3 +--
 src/mesa/vbo/vbo_split_copy.c                      |  4 ++--
 26 files changed, 53 insertions(+), 75 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 56a46ced6e3..2049850417b 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -699,7 +699,7 @@ static void brw_prepare_indices(struct brw_context *brw)
 			     &bo, &offset);
 	   brw->ib.start_vertex_offset = offset / ib_type_size;
 
-           ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
+           ctx->Driver.UnmapBuffer(ctx, bufferobj);
        } else {
 	  /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
 	   * the index buffer state when we're just moving the start index
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 703300b31af..57609fd5d32 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -41,8 +41,7 @@
 #include "intel_regions.h"
 
 static GLboolean
-intel_bufferobj_unmap(struct gl_context * ctx,
-                      GLenum target, struct gl_buffer_object *obj);
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj);
 
 /** Allocates a new drm_intel_bo to store the data for the buffer object. */
 static void
@@ -122,7 +121,7 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj)
     * (though it does if you call glDeleteBuffers)
     */
    if (obj->Pointer)
-      intel_bufferobj_unmap(ctx, 0, obj);
+      intel_bufferobj_unmap(ctx, obj);
 
    free(intel_obj->sys_buffer);
    if (intel_obj->region) {
@@ -507,8 +506,7 @@ intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
  * Called via glUnmapBuffer().
  */
 static GLboolean
-intel_bufferobj_unmap(struct gl_context * ctx,
-                      GLenum target, struct gl_buffer_object *obj)
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj)
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
@@ -766,7 +764,7 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
 	 char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
 					 GL_READ_WRITE, dst);
 	 memmove(ptr + write_offset, ptr + read_offset, size);
-	 intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+	 intel_bufferobj_unmap(ctx, dst);
       } else {
 	 const char *src_ptr;
 	 char *dst_ptr;
@@ -778,8 +776,8 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
 
 	 memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
 
-	 intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src);
-	 intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+	 intel_bufferobj_unmap(ctx, src);
+	 intel_bufferobj_unmap(ctx, dst);
       }
       return;
    }
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 86d0ef2d748..1727d4c1a91 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -292,8 +292,7 @@ out:
 
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
       /* done with PBO so unmap it now */
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
    }
 
    intel_check_front_buffer_rendering(intel);
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index e60b91f64be..c0ab31b0b11 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -169,7 +169,7 @@ nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offs
 }
 
 static GLboolean
-nouveau_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj)
+nouveau_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj)
 {
 	assert(obj->Pointer);
 
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index 0c4d8537c61..c47e15534ca 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -138,7 +138,7 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 	r300->ind_buf.count = mesa_ind_buf->count;
 
 	if (mapped_named_bo) {
-		ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+		ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
 	}
 }
 
@@ -184,7 +184,7 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 		r300->ind_buf.count = mesa_ind_buf->count;
 
 		if (mapped_named_bo) {
-			ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+			ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
 		}
 	} else {
 		r300FixupIndexBuffer(ctx, mesa_ind_buf);
@@ -286,7 +286,7 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl
 
 	radeon_bo_unmap(attr->bo);
 	if (mapped_named_bo) {
-		ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+		ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
 	}
 }
 
@@ -321,7 +321,7 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_
 	}
 
 	if (mapped_named_bo) {
-		ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+		ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
 	}
 
 	radeon_bo_unmap(attr->bo);
diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c
index 4507be29d86..9a310eec6bc 100644
--- a/src/mesa/drivers/dri/r600/evergreen_render.c
+++ b/src/mesa/drivers/dri/r600/evergreen_render.c
@@ -456,7 +456,7 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count,
 
     if (mapped_named_bo) 
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+        ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
     }
 }
 
@@ -531,7 +531,7 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa
 
     if (mapped_named_bo)
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+        ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
     }
 }
 
@@ -629,7 +629,7 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa
 
         if (mapped_named_bo)
         {
-	        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+	        ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
         }
     }
     else
@@ -675,7 +675,7 @@ static void evergreenAlignDataToDword(struct gl_context *ctx,
     radeon_bo_unmap(attr->bo);
     if (mapped_named_bo) 
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+        ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
     }
 
     attr->stride = dst_stride;
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 0f7a7a46b71..2300fe6d33f 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -543,7 +543,7 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count,
 
     if (mapped_named_bo) 
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+        ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
     }
 }
 
@@ -584,7 +584,7 @@ static void r700AlignDataToDword(struct gl_context *ctx,
     radeon_bo_unmap(attr->bo);
     if (mapped_named_bo) 
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+        ctx->Driver.UnmapBuffer(ctx, input->BufferObj);
     }
 
     attr->stride = dst_stride;
@@ -788,7 +788,7 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 
     if (mapped_named_bo)
     {
-        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+        ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
     }
 }
 
@@ -836,7 +836,7 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 
         if (mapped_named_bo)
         {
-	        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+	        ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj);
         }
     }
     else
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index 0d1af726c07..eac1277a719 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -205,7 +205,6 @@ radeonMapBuffer(struct gl_context * ctx,
  */
 static GLboolean
 radeonUnmapBuffer(struct gl_context * ctx,
-                  GLenum target,
                   struct gl_buffer_object *obj)
 {
     struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index 81f48f9d95a..3319d118142 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -508,8 +508,7 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx,
       }
 
       if (_mesa_is_bufferobj(unpack->BufferObj)) {
-         ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                 unpack->BufferObj);
+         ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
       }
    }
    else {
@@ -642,8 +641,7 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx,
       }
 
       if (unpack->BufferObj->Name) {
-         ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                 unpack->BufferObj);
+         ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
       }
    }
    else {
diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index f88da845853..385bef1c53e 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1622,9 +1622,7 @@ void _ae_unmap_vbos( struct gl_context *ctx )
    assert (!actx->NewState);
 
    for (i = 0; i < actx->nr_vbos; i++)
-      ctx->Driver.UnmapBuffer(ctx,
-			      GL_ARRAY_BUFFER_ARB,
-			      actx->vbo[i]);
+      ctx->Driver.UnmapBuffer(ctx, actx->vbo[i]);
 
    actx->mapped_vbos = GL_FALSE;
 }
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index 2981d42297a..08faf9e08b4 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -89,7 +89,7 @@ _mesa_max_buffer_index(struct gl_context *ctx, GLuint count, GLenum type,
    }
 
    if (map) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, elementBuf);
+      ctx->Driver.UnmapBuffer(ctx, elementBuf);
    }
 
    return max;
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index c52358ecb04..e52e59eb5c2 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -512,11 +512,9 @@ _mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLenum target,
  * \sa glUnmapBufferARB, dd_function_table::UnmapBuffer
  */
 static GLboolean
-_mesa_buffer_unmap( struct gl_context *ctx, GLenum target,
-                    struct gl_buffer_object *bufObj )
+_mesa_buffer_unmap( struct gl_context *ctx, struct gl_buffer_object *bufObj )
 {
    (void) ctx;
-   (void) target;
    /* XXX we might assert here that bufObj->Pointer is non-null */
    bufObj->Pointer = NULL;
    bufObj->Length = 0;
@@ -551,8 +549,8 @@ _mesa_copy_buffer_subdata(struct gl_context *ctx,
    if (srcPtr && dstPtr)
       memcpy(dstPtr + writeOffset, srcPtr + readOffset, size);
 
-   ctx->Driver.UnmapBuffer(ctx, GL_COPY_READ_BUFFER, src);
-   ctx->Driver.UnmapBuffer(ctx, GL_COPY_WRITE_BUFFER, dst);
+   ctx->Driver.UnmapBuffer(ctx, src);
+   ctx->Driver.UnmapBuffer(ctx, dst);
 }
 
 
@@ -774,7 +772,7 @@ _mesa_DeleteBuffersARB(GLsizei n, const GLuint *ids)
 
          if (_mesa_bufferobj_mapped(bufObj)) {
             /* if mapped, unmap it now */
-            ctx->Driver.UnmapBuffer(ctx, 0, bufObj);
+            ctx->Driver.UnmapBuffer(ctx, bufObj);
             bufObj->AccessFlags = DEFAULT_ACCESS;
             bufObj->Pointer = NULL;
          }
@@ -934,7 +932,7 @@ _mesa_BufferDataARB(GLenum target, GLsizeiptrARB size,
    
    if (_mesa_bufferobj_mapped(bufObj)) {
       /* Unmap the existing buffer.  We'll replace it now.  Not an error. */
-      ctx->Driver.UnmapBuffer(ctx, target, bufObj);
+      ctx->Driver.UnmapBuffer(ctx, bufObj);
       bufObj->AccessFlags = DEFAULT_ACCESS;
       ASSERT(bufObj->Pointer == NULL);
    }  
@@ -1147,7 +1145,7 @@ _mesa_UnmapBufferARB(GLenum target)
    }
 #endif
 
-   status = ctx->Driver.UnmapBuffer( ctx, target, bufObj );
+   status = ctx->Driver.UnmapBuffer( ctx, bufObj );
    bufObj->AccessFlags = DEFAULT_ACCESS;
    ASSERT(bufObj->Pointer == NULL);
    ASSERT(bufObj->Offset == 0);
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index b5ed9a40c70..318ea1f25aa 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -725,7 +725,7 @@ struct dd_function_table {
                                   GLintptr offset, GLsizeiptr length,
                                   struct gl_buffer_object *obj);
 
-   GLboolean (*UnmapBuffer)( struct gl_context *ctx, GLenum target,
+   GLboolean (*UnmapBuffer)( struct gl_context *ctx,
 			     struct gl_buffer_object *obj );
    /*@}*/
 
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index f9282398c21..3e54af25d00 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -906,8 +906,7 @@ unpack_image(struct gl_context *ctx, GLuint dimensions,
       image = _mesa_unpack_image(dimensions, width, height, depth,
                                  format, type, src, unpack);
 
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
 
       if (!image) {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "display list construction");
diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c
index 15e0480e9f1..f93cdf1e392 100644
--- a/src/mesa/main/pbo.c
+++ b/src/mesa/main/pbo.c
@@ -201,8 +201,7 @@ _mesa_unmap_pbo_source(struct gl_context *ctx,
 {
    ASSERT(unpack != &ctx->Pack); /* catch pack/unpack mismatch */
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
    }
 }
 
@@ -297,7 +296,7 @@ _mesa_unmap_pbo_dest(struct gl_context *ctx,
 {
    ASSERT(pack != &ctx->Unpack); /* catch pack/unpack mismatch */
    if (_mesa_is_bufferobj(pack->BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT, pack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, pack->BufferObj);
    }
 }
 
@@ -384,8 +383,7 @@ _mesa_unmap_teximage_pbo(struct gl_context *ctx,
                          const struct gl_pixelstore_attrib *unpack)
 {
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                              unpack->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
    }
 }
 
diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c
index d84f59690c5..8b7159db09c 100644
--- a/src/mesa/main/shared.c
+++ b/src/mesa/main/shared.c
@@ -200,7 +200,7 @@ delete_bufferobj_cb(GLuint id, void *data, void *userData)
    struct gl_buffer_object *bufObj = (struct gl_buffer_object *) data;
    struct gl_context *ctx = (struct gl_context *) userData;
    if (_mesa_bufferobj_mapped(bufObj)) {
-      ctx->Driver.UnmapBuffer(ctx, 0, bufObj);
+      ctx->Driver.UnmapBuffer(ctx, bufObj);
       bufObj->Pointer = NULL;
    }
    _mesa_reference_buffer_object(ctx, &bufObj, NULL);
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 26c2ff98ba1..20595ef3b56 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -474,8 +474,7 @@ _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level,
    }
 
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                              ctx->Pack.BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, ctx->Pack.BufferObj);
    }
 }
 
@@ -531,8 +530,7 @@ _mesa_get_compressed_teximage(struct gl_context *ctx, GLenum target, GLint level
    }
 
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                              ctx->Pack.BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, ctx->Pack.BufferObj);
    }
 }
 
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 7374bb0acc5..732bbaabd4a 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -378,7 +378,7 @@ st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
  * Called via glUnmapBufferARB().
  */
 static GLboolean
-st_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj)
+st_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj)
 {
    struct pipe_context *pipe = st_context(ctx)->pipe;
    struct st_buffer_object *st_obj = st_buffer_object(obj);
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index b1967e65417..19d4f203c35 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -402,9 +402,7 @@ static void unmap_vbos( struct gl_context *ctx,
 {
    GLuint i;
    for (i = 0; i < nr_bo; i++) { 
-      ctx->Driver.UnmapBuffer(ctx, 
-			      0, /* target -- I don't see why this would be needed */
-			      bo[i]);
+      ctx->Driver.UnmapBuffer(ctx, bo[i]);
    }
 }
 
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index af66dbd44d4..8474c787a46 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -947,7 +947,7 @@ void vbo_exec_vtx_destroy( struct vbo_exec_context *exec )
    /* Free the vertex buffer.  Unmap first if needed.
     */
    if (_mesa_bufferobj_mapped(exec->vtx.bufferobj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, exec->vtx.bufferobj);
+      ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj);
    }
    _mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, NULL);
 }
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 32ce0e4a8ff..2692b7539a4 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -176,7 +176,7 @@ vbo_get_minmax_index(struct gl_context *ctx,
    }
 
    if (_mesa_is_bufferobj(ib->obj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, ib->obj);
+      ctx->Driver.UnmapBuffer(ctx, ib->obj);
    }
 }
 
@@ -238,7 +238,7 @@ unmap_array_buffer(struct gl_context *ctx, struct gl_client_array *array)
    if (array->Enabled &&
        _mesa_is_bufferobj(array->BufferObj) &&
        _mesa_bufferobj_mapped(array->BufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, array->BufferObj);
+      ctx->Driver.UnmapBuffer(ctx, array->BufferObj);
    }
 }
 
@@ -296,8 +296,7 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType,
    }
 
    if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
-			      ctx->Array.ElementArrayBufferObj);
+      ctx->Driver.UnmapBuffer(ctx, ctx->Array.ElementArrayBufferObj);
    }
 
    unmap_array_buffer(ctx, &arrayObj->Vertex);
@@ -364,7 +363,7 @@ print_draw_arrays(struct gl_context *ctx,
          for (i = 0; i < n; i++) {
             printf("    float[%d] = 0x%08x %f\n", i, k[i], f[i]);
          }
-         ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, bufObj);
+         ctx->Driver.UnmapBuffer(ctx, bufObj);
       }
    }
 }
@@ -760,8 +759,7 @@ dump_element_buffer(struct gl_context *ctx, GLenum type)
       ;
    }
 
-   ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
-                           ctx->Array.ElementArrayBufferObj);
+   ctx->Driver.UnmapBuffer(ctx, ctx->Array.ElementArrayBufferObj);
 }
 
 
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 7e8d8602093..5366b989df2 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -281,7 +281,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
       assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE);
       assert(exec->vtx.buffer_ptr != NULL);
       
-      ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj);
+      ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj);
       exec->vtx.buffer_map = NULL;
       exec->vtx.buffer_ptr = NULL;
       exec->vtx.max_vert = 0;
diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c
index 1de290ff602..09ba0a6e9ac 100644
--- a/src/mesa/vbo/vbo_rebase.c
+++ b/src/mesa/vbo/vbo_rebase.c
@@ -183,9 +183,7 @@ void vbo_rebase_prims( struct gl_context *ctx,
       }      
 
       if (map_ib) 
-	 ctx->Driver.UnmapBuffer(ctx, 
-				 GL_ELEMENT_ARRAY_BUFFER,
-				 ib->obj);
+	 ctx->Driver.UnmapBuffer(ctx, ib->obj);
 
       tmp_ib.obj = ctx->Shared->NullBufferObj;
       tmp_ib.ptr = tmp_indices;
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index 9041f791edd..00f34ad1ec0 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -247,7 +247,7 @@ static void
 unmap_vertex_store(struct gl_context *ctx,
                    struct vbo_save_vertex_store *vertex_store)
 {
-   ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vertex_store->bufferobj);
+   ctx->Driver.UnmapBuffer(ctx, vertex_store->bufferobj);
    vertex_store->buffer = NULL;
 }
 
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index a37af73e0db..0d505555789 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -230,8 +230,7 @@ vbo_save_loopback_vertex_list(struct gl_context *ctx,
                             list->wrap_count,
                             list->vertex_size);
 
-   ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, 
-			   list->vertex_store->bufferobj);
+   ctx->Driver.UnmapBuffer(ctx, list->vertex_store->bufferobj);
 }
 
 
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index ecca1171673..49c7435ab5e 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -564,14 +564,14 @@ replay_finish( struct copy_context *copy )
    for (i = 0; i < copy->nr_varying; i++) {
       struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
       if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo)) 
-	 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, vbo);
+	 ctx->Driver.UnmapBuffer(ctx, vbo);
    }
 
    /* Unmap index buffer:
     */
    if (_mesa_is_bufferobj(copy->ib->obj) &&
        _mesa_bufferobj_mapped(copy->ib->obj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, copy->ib->obj);
+      ctx->Driver.UnmapBuffer(ctx, copy->ib->obj);
    }
 }
 

From 12d924c5ae14a1c6a05a3dcae29b77e7668e227d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 17:07:56 -0700
Subject: [PATCH 528/600] mesa: Remove target parameter from
 dd_function_table::MapBuffer

No driver used that parameter, and most drivers ended up with a bunch
of unused-parameter warnings because it was there.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c         |  1 -
 src/mesa/drivers/dri/intel/intel_buffer_objects.c   | 10 +++-------
 src/mesa/drivers/dri/intel/intel_pixel_bitmap.c     |  2 +-
 src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c    |  4 ++--
 src/mesa/drivers/dri/r300/r300_draw.c               |  8 ++++----
 src/mesa/drivers/dri/r600/evergreen_render.c        |  8 ++++----
 src/mesa/drivers/dri/r600/r700_render.c             |  8 ++++----
 src/mesa/drivers/dri/radeon/radeon_buffer_objects.c |  1 -
 src/mesa/drivers/x11/xm_dd.c                        |  1 -
 src/mesa/main/api_arrayelt.c                        |  1 -
 src/mesa/main/api_validate.c                        |  3 +--
 src/mesa/main/bufferobj.c                           | 11 ++++-------
 src/mesa/main/dd.h                                  |  2 +-
 src/mesa/main/dlist.c                               |  3 +--
 src/mesa/main/pbo.c                                 | 11 +++++------
 src/mesa/main/texgetimage.c                         |  6 ++----
 src/mesa/state_tracker/st_cb_bufferobjects.c        |  2 +-
 src/mesa/tnl/t_draw.c                               |  2 --
 src/mesa/vbo/vbo_exec_array.c                       | 11 +++--------
 src/mesa/vbo/vbo_exec_draw.c                        |  2 +-
 src/mesa/vbo/vbo_rebase.c                           |  5 +----
 src/mesa/vbo/vbo_save_api.c                         |  1 -
 src/mesa/vbo/vbo_save_draw.c                        |  1 -
 src/mesa/vbo/vbo_split_copy.c                       |  5 ++---
 24 files changed, 40 insertions(+), 69 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 2049850417b..66c42aa0779 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -690,7 +690,6 @@ static void brw_prepare_indices(struct brw_context *brw)
        */
        if ((get_size(index_buffer->type) - 1) & offset) {
            GLubyte *map = ctx->Driver.MapBuffer(ctx,
-                                                GL_ELEMENT_ARRAY_BUFFER_ARB,
                                                 GL_DYNAMIC_DRAW_ARB,
                                                 bufferobj);
            map += offset;
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 57609fd5d32..6f3a90942f6 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -301,7 +301,6 @@ intel_bufferobj_get_subdata(struct gl_context * ctx,
  */
 static void *
 intel_bufferobj_map(struct gl_context * ctx,
-                    GLenum target,
                     GLenum access, struct gl_buffer_object *obj)
 {
    struct intel_context *intel = intel_context(ctx);
@@ -761,18 +760,15 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
        * not overlap.
        */
       if (src == dst) {
-	 char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
-					 GL_READ_WRITE, dst);
+	 char *ptr = intel_bufferobj_map(ctx, GL_READ_WRITE, dst);
 	 memmove(ptr + write_offset, ptr + read_offset, size);
 	 intel_bufferobj_unmap(ctx, dst);
       } else {
 	 const char *src_ptr;
 	 char *dst_ptr;
 
-	 src_ptr =  intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER,
-					GL_READ_ONLY, src);
-	 dst_ptr =  intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
-					GL_WRITE_ONLY, dst);
+	 src_ptr =  intel_bufferobj_map(ctx, GL_READ_ONLY, src);
+	 dst_ptr =  intel_bufferobj_map(ctx, GL_WRITE_ONLY, dst);
 
 	 memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
 
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 1727d4c1a91..44d79534ffd 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -74,7 +74,7 @@ static const GLubyte *map_pbo( struct gl_context *ctx,
       return NULL;
    }
 
-   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
 					   GL_READ_ONLY_ARB,
 					   unpack->BufferObj);
    if (!buf) {
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index c0ab31b0b11..9db39491515 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -123,7 +123,7 @@ nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB
 }
 
 static void *
-nouveau_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
+nouveau_bufferobj_map(struct gl_context *ctx, GLenum access,
 		   struct gl_buffer_object *obj)
 {
 	unsigned flags = 0;
@@ -135,7 +135,7 @@ nouveau_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
 	    access == GL_READ_WRITE_ARB)
 		flags |= GL_MAP_WRITE_BIT;
 
-	return ctx->Driver.MapBufferRange(ctx, target, 0, obj->Size, flags,
+	return ctx->Driver.MapBufferRange(ctx, 0, 0, obj->Size, flags,
 					  obj);
 }
 
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index c47e15534ca..ba37923736c 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -84,7 +84,7 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 	GLboolean mapped_named_bo = GL_FALSE;
 
 	if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
-		ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+		ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 		mapped_named_bo = GL_TRUE;
 		assert(mesa_ind_buf->obj->Pointer != NULL);
 	}
@@ -163,7 +163,7 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 		GLboolean mapped_named_bo = GL_FALSE;
 
 		if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
-			ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+			ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 			assert(mesa_ind_buf->obj->Pointer != NULL);
 			mapped_named_bo = GL_TRUE;
 		}
@@ -235,7 +235,7 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl
 
 	if (input->BufferObj->Name) {
 		if (!input->BufferObj->Pointer) {
-			ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+			ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
 			mapped_named_bo = GL_TRUE;
 		}
 
@@ -302,7 +302,7 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_
 	radeon_bo_map(attr->bo, 1);
 
 	if (!input->BufferObj->Pointer) {
-		ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+		ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
 		mapped_named_bo = GL_TRUE;
 	}
 
diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c
index 9a310eec6bc..e82f4d445f5 100644
--- a/src/mesa/drivers/dri/r600/evergreen_render.c
+++ b/src/mesa/drivers/dri/r600/evergreen_render.c
@@ -403,7 +403,7 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count,
     {
         if (!input->BufferObj->Pointer) 
         {
-            ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+            ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
             mapped_named_bo = GL_TRUE;
         }
 
@@ -470,7 +470,7 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa
 
     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
     {
-        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
         mapped_named_bo = GL_TRUE;
         assert(mesa_ind_buf->obj->Pointer != NULL);
     }
@@ -606,7 +606,7 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa
 
         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
         {
-	        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+	        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 	        assert(mesa_ind_buf->obj->Pointer != NULL);
 	        mapped_named_bo = GL_TRUE;
         }
@@ -655,7 +655,7 @@ static void evergreenAlignDataToDword(struct gl_context *ctx,
 
     if (!input->BufferObj->Pointer) 
     {
-        ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
         mapped_named_bo = GL_TRUE;
     }
 
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 2300fe6d33f..52a6f7cc45e 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -490,7 +490,7 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count,
     {
         if (!input->BufferObj->Pointer) 
         {
-            ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+            ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
             mapped_named_bo = GL_TRUE;
         }
 
@@ -564,7 +564,7 @@ static void r700AlignDataToDword(struct gl_context *ctx,
 
     if (!input->BufferObj->Pointer) 
     {
-        ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
         mapped_named_bo = GL_TRUE;
     }
 
@@ -727,7 +727,7 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 
     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
     {
-        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
         mapped_named_bo = GL_TRUE;
         assert(mesa_ind_buf->obj->Pointer != NULL);
     }
@@ -813,7 +813,7 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 
         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
         {
-	        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+	        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
 	        assert(mesa_ind_buf->obj->Pointer != NULL);
 	        mapped_named_bo = GL_TRUE;
         }
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index eac1277a719..e645723299f 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -175,7 +175,6 @@ radeonGetBufferSubData(struct gl_context * ctx,
  */
 static void *
 radeonMapBuffer(struct gl_context * ctx,
-                GLenum target,
                 GLenum access,
                 struct gl_buffer_object *obj)
 {
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index 3319d118142..fe00bdd520d 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -455,7 +455,6 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx,
             return;
          }
          buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-                                                 GL_PIXEL_UNPACK_BUFFER_EXT,
                                                  GL_READ_ONLY_ARB,
                                                  unpack->BufferObj);
          if (!buf) {
diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index 385bef1c53e..6400c8f59d7 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1603,7 +1603,6 @@ void _ae_map_vbos( struct gl_context *ctx )
 
    for (i = 0; i < actx->nr_vbos; i++)
       ctx->Driver.MapBuffer(ctx,
-			    GL_ARRAY_BUFFER_ARB,
 			    GL_DYNAMIC_DRAW_ARB,
 			    actx->vbo[i]);
 
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index 08faf9e08b4..507d0ce6883 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -65,8 +65,7 @@ _mesa_max_buffer_index(struct gl_context *ctx, GLuint count, GLenum type,
 
    if (_mesa_is_bufferobj(elementBuf)) {
       /* elements are in a user-defined buffer object.  need to map it */
-      map = ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER,
-                                  GL_READ_ONLY, elementBuf);
+      map = ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, elementBuf);
       /* Actual address is the sum of pointers */
       indices = (const GLvoid *) ADD_POINTERS(map, (const GLubyte *) indices);
    }
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index e52e59eb5c2..fc1ca2a3680 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -447,11 +447,10 @@ _mesa_buffer_get_subdata( struct gl_context *ctx,
  * \sa glMapBufferARB, dd_function_table::MapBuffer
  */
 static void *
-_mesa_buffer_map( struct gl_context *ctx, GLenum target, GLenum access,
+_mesa_buffer_map( struct gl_context *ctx, GLenum access,
 		  struct gl_buffer_object *bufObj )
 {
    (void) ctx;
-   (void) target;
    (void) access;
    /* Just return a direct pointer to the data */
    if (_mesa_bufferobj_mapped(bufObj)) {
@@ -541,10 +540,8 @@ _mesa_copy_buffer_subdata(struct gl_context *ctx,
    assert(!_mesa_bufferobj_mapped(src));
    assert(!_mesa_bufferobj_mapped(dst));
 
-   srcPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_COPY_READ_BUFFER,
-                                              GL_READ_ONLY, src);
-   dstPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_COPY_WRITE_BUFFER,
-                                              GL_WRITE_ONLY, dst);
+   srcPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, src);
+   dstPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY, dst);
 
    if (srcPtr && dstPtr)
       memcpy(dstPtr + writeOffset, srcPtr + readOffset, size);
@@ -1042,7 +1039,7 @@ _mesa_MapBufferARB(GLenum target, GLenum access)
    }
 
    ASSERT(ctx->Driver.MapBuffer);
-   map = ctx->Driver.MapBuffer( ctx, target, access, bufObj );
+   map = ctx->Driver.MapBuffer( ctx, access, bufObj );
    if (!map) {
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)");
       return NULL;
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 318ea1f25aa..cfccdb0d828 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -706,7 +706,7 @@ struct dd_function_table {
 			     GLintptrARB offset, GLsizeiptrARB size,
 			     GLvoid *data, struct gl_buffer_object *obj );
 
-   void * (*MapBuffer)( struct gl_context *ctx, GLenum target, GLenum access,
+   void * (*MapBuffer)( struct gl_context *ctx, GLenum access,
 			struct gl_buffer_object *obj );
 
    void (*CopyBufferSubData)( struct gl_context *ctx,
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index 3e54af25d00..a135810ed27 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -894,8 +894,7 @@ unpack_image(struct gl_context *ctx, GLuint dimensions,
       GLvoid *image;
 
       map = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                               GL_READ_ONLY_ARB, unpack->BufferObj);
+         ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, unpack->BufferObj);
       if (!map) {
          /* unable to map src buffer! */
          _mesa_error(ctx, GL_INVALID_OPERATION, "unable to map PBO");
diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c
index f93cdf1e392..ce362b9e444 100644
--- a/src/mesa/main/pbo.c
+++ b/src/mesa/main/pbo.c
@@ -128,7 +128,7 @@ _mesa_map_pbo_source(struct gl_context *ctx,
 
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
       /* unpack from PBO */
-      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
                                               GL_READ_ONLY_ARB,
                                               unpack->BufferObj);
       if (!buf)
@@ -223,7 +223,7 @@ _mesa_map_pbo_dest(struct gl_context *ctx,
 
    if (_mesa_is_bufferobj(pack->BufferObj)) {
       /* pack into PBO */
-      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
+      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
                                               GL_WRITE_ONLY_ARB,
                                               pack->BufferObj);
       if (!buf)
@@ -326,8 +326,8 @@ _mesa_validate_pbo_teximage(struct gl_context *ctx, GLuint dimensions,
       return NULL;
    }
 
-   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                          GL_READ_ONLY_ARB, unpack->BufferObj);
+   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB,
+					   unpack->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped)");
       return NULL;
@@ -363,8 +363,7 @@ _mesa_validate_pbo_compressed_teximage(struct gl_context *ctx,
       return NULL;
    }
 
-   buf = (GLubyte*) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
-                                         GL_READ_ONLY_ARB, packing->BufferObj);
+   buf = (GLubyte*) ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, packing->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped");
       return NULL;
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 20595ef3b56..a54da7160c7 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -441,8 +441,7 @@ _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level,
        * texture data to the PBO if the PBO is in VRAM along with the texture.
        */
       GLubyte *buf = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                               GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
+         ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
       if (!buf) {
          /* out of memory or other unexpected error */
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage(map PBO failed)");
@@ -499,8 +498,7 @@ _mesa_get_compressed_teximage(struct gl_context *ctx, GLenum target, GLint level
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
       /* pack texture image into a PBO */
       GLubyte *buf = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_PIXEL_PACK_BUFFER_EXT,
-                               GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
+         ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
       if (!buf) {
          /* out of memory or other unexpected error */
          _mesa_error(ctx, GL_OUT_OF_MEMORY,
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 732bbaabd4a..a1df11806eb 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -241,7 +241,7 @@ static long st_bufferobj_zero_length = 0;
  * Called via glMapBufferARB().
  */
 static void *
-st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
+st_bufferobj_map(struct gl_context *ctx, GLenum access,
                  struct gl_buffer_object *obj)
 {
    struct st_buffer_object *st_obj = st_buffer_object(obj);
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index 19d4f203c35..7351f6f3be6 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -281,7 +281,6 @@ static void bind_inputs( struct gl_context *ctx,
 	    bo[*nr_bo] = inputs[i]->BufferObj;
 	    (*nr_bo)++;
 	    ctx->Driver.MapBuffer(ctx, 
-				  GL_ARRAY_BUFFER,
 				  GL_READ_ONLY_ARB,
 				  inputs[i]->BufferObj);
 	    
@@ -351,7 +350,6 @@ static void bind_indices( struct gl_context *ctx,
       bo[*nr_bo] = ib->obj;
       (*nr_bo)++;
       ctx->Driver.MapBuffer(ctx, 
-			    GL_ELEMENT_ARRAY_BUFFER,
 			    GL_READ_ONLY_ARB,
 			    ib->obj);
 
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 2692b7539a4..8359a7f1529 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -96,8 +96,7 @@ vbo_get_minmax_index(struct gl_context *ctx,
 
    if (_mesa_is_bufferobj(ib->obj)) {
       const GLvoid *map =
-         ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
-                               GL_READ_ONLY, ib->obj);
+         ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, ib->obj);
       indices = ADD_POINTERS(map, ib->ptr);
    } else {
       indices = ib->ptr;
@@ -196,8 +195,7 @@ check_array_data(struct gl_context *ctx, struct gl_client_array *array,
          if (!array->BufferObj->Pointer) {
             /* need to map now */
             array->BufferObj->Pointer =
-               ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
-                                     GL_READ_ONLY, array->BufferObj);
+               ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, array->BufferObj);
          }
          data = ADD_POINTERS(data, array->BufferObj->Pointer);
       }
@@ -257,7 +255,6 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType,
 
    if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
       elemMap = ctx->Driver.MapBuffer(ctx,
-                                      GL_ELEMENT_ARRAY_BUFFER_ARB,
                                       GL_READ_ONLY,
                                       ctx->Array.ElementArrayBufferObj);
       elements = ADD_POINTERS(elements, elemMap);
@@ -350,8 +347,7 @@ print_draw_arrays(struct gl_context *ctx,
 	     bufName);
 
       if (bufName) {
-         GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
-                                            GL_READ_ONLY_ARB, bufObj);
+         GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, bufObj);
          int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr;
          float *f = (float *) (p + offset);
          int *k = (int *) f;
@@ -715,7 +711,6 @@ static void
 dump_element_buffer(struct gl_context *ctx, GLenum type)
 {
    const GLvoid *map = ctx->Driver.MapBuffer(ctx,
-                                             GL_ELEMENT_ARRAY_BUFFER_ARB,
                                              GL_READ_ONLY,
                                              ctx->Array.ElementArrayBufferObj);
    switch (type) {
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 5366b989df2..07c5c969453 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -342,7 +342,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
                                                   exec->vtx.bufferobj);
       if (!exec->vtx.buffer_map)
          exec->vtx.buffer_map =
-            (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj);
+            (GLfloat *)ctx->Driver.MapBuffer(ctx, access, exec->vtx.bufferobj);
       assert(exec->vtx.buffer_map);
       exec->vtx.buffer_ptr = exec->vtx.buffer_map;
    }
diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c
index 09ba0a6e9ac..e10908d5ece 100644
--- a/src/mesa/vbo/vbo_rebase.c
+++ b/src/mesa/vbo/vbo_rebase.c
@@ -159,10 +159,7 @@ void vbo_rebase_prims( struct gl_context *ctx,
       void *ptr;
 
       if (map_ib) 
-	 ctx->Driver.MapBuffer(ctx, 
-			       GL_ELEMENT_ARRAY_BUFFER,
-			       GL_READ_ONLY_ARB,
-			       ib->obj);
+	 ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, ib->obj);
 
 
       ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index 00f34ad1ec0..f90f00c5aae 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -233,7 +233,6 @@ map_vertex_store(struct gl_context *ctx,
    assert(!vertex_store->buffer);
    vertex_store->buffer =
       (GLfloat *) ctx->Driver.MapBuffer(ctx,
-                                        GL_ARRAY_BUFFER_ARB,   /* not used */
                                         GL_WRITE_ONLY,      /* not used */
                                         vertex_store->
                                         bufferobj);
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index 0d505555789..52952a57ec8 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -218,7 +218,6 @@ vbo_save_loopback_vertex_list(struct gl_context *ctx,
                               const struct vbo_save_vertex_list *list)
 {
    const char *buffer = ctx->Driver.MapBuffer(ctx, 
-					      GL_ARRAY_BUFFER_ARB, 
 					      GL_READ_ONLY, /* ? */
                                               list->vertex_store->bufferobj);
 
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index 49c7435ab5e..8dc5aa0ed76 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -444,7 +444,7 @@ replay_init( struct copy_context *copy )
 	 copy->vertex_size += attr_size(copy->array[i]);
       
 	 if (_mesa_is_bufferobj(vbo) && !_mesa_bufferobj_mapped(vbo)) 
-	    ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY, vbo);
+	    ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, vbo);
 
 	 copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer,
 						 copy->array[i]->Ptr);
@@ -459,8 +459,7 @@ replay_init( struct copy_context *copy )
     */
    if (_mesa_is_bufferobj(copy->ib->obj) &&
        !_mesa_bufferobj_mapped(copy->ib->obj)) 
-      ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY,
-			    copy->ib->obj);
+      ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, copy->ib->obj);
 
    srcptr = (const GLubyte *) ADD_POINTERS(copy->ib->obj->Pointer,
                                            copy->ib->ptr);

From 92f3fca0ea429dcf07123e63447449db53308266 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 17:23:58 -0700
Subject: [PATCH 529/600] mesa: Remove target parameter from
 dd_function_table::BufferSubData

No driver used that parameter, and most drivers ended up with a bunch
of unused-parameter warnings because it was there.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c   | 1 -
 src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c    | 2 +-
 src/mesa/drivers/dri/radeon/radeon_buffer_objects.c | 1 -
 src/mesa/main/bufferobj.c                           | 6 +++---
 src/mesa/main/dd.h                                  | 2 +-
 src/mesa/state_tracker/st_cb_bufferobjects.c        | 1 -
 6 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 6f3a90942f6..307b5542798 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -202,7 +202,6 @@ intel_bufferobj_data(struct gl_context * ctx,
  */
 static void
 intel_bufferobj_subdata(struct gl_context * ctx,
-                        GLenum target,
                         GLintptrARB offset,
                         GLsizeiptrARB size,
                         const GLvoid * data, struct gl_buffer_object *obj)
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index 9db39491515..6b10d28c3cf 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -107,7 +107,7 @@ nouveau_bufferobj_data(struct gl_context *ctx, GLenum target, GLsizeiptrARB size
 }
 
 static void
-nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset,
+nouveau_bufferobj_subdata(struct gl_context *ctx, GLintptrARB offset,
 			  GLsizeiptrARB size, const GLvoid *data,
 			  struct gl_buffer_object *obj)
 {
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index e645723299f..319890c48ac 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -130,7 +130,6 @@ radeonBufferData(struct gl_context * ctx,
  */
 static void
 radeonBufferSubData(struct gl_context * ctx,
-                    GLenum target,
                     GLintptrARB offset,
                     GLsizeiptrARB size,
                     const GLvoid * data,
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index fc1ca2a3680..b0721ea600b 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -386,11 +386,11 @@ _mesa_buffer_data( struct gl_context *ctx, GLenum target, GLsizeiptrARB size,
  * \sa glBufferSubDataARB, dd_function_table::BufferSubData.
  */
 static void
-_mesa_buffer_subdata( struct gl_context *ctx, GLenum target, GLintptrARB offset,
+_mesa_buffer_subdata( struct gl_context *ctx, GLintptrARB offset,
 		      GLsizeiptrARB size, const GLvoid * data,
 		      struct gl_buffer_object * bufObj )
 {
-   (void) ctx; (void) target;
+   (void) ctx;
 
    /* this should have been caught in _mesa_BufferSubData() */
    ASSERT(size + offset <= bufObj->Size);
@@ -975,7 +975,7 @@ _mesa_BufferSubDataARB(GLenum target, GLintptrARB offset,
    bufObj->Written = GL_TRUE;
 
    ASSERT(ctx->Driver.BufferSubData);
-   ctx->Driver.BufferSubData( ctx, target, offset, size, data, bufObj );
+   ctx->Driver.BufferSubData( ctx, offset, size, data, bufObj );
 }
 
 
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index cfccdb0d828..54fcb88f69a 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -698,7 +698,7 @@ struct dd_function_table {
                             const GLvoid *data, GLenum usage,
                             struct gl_buffer_object *obj );
 
-   void (*BufferSubData)( struct gl_context *ctx, GLenum target, GLintptrARB offset,
+   void (*BufferSubData)( struct gl_context *ctx, GLintptrARB offset,
 			  GLsizeiptrARB size, const GLvoid *data,
 			  struct gl_buffer_object *obj );
 
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index a1df11806eb..d71b38bbddf 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -93,7 +93,6 @@ st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj)
  */
 static void
 st_bufferobj_subdata(struct gl_context *ctx,
-		     GLenum target,
 		     GLintptrARB offset,
 		     GLsizeiptrARB size,
 		     const GLvoid * data, struct gl_buffer_object *obj)

From 6c8aa3491a19535e8c39a47a3766bf8524e80582 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 17:30:35 -0700
Subject: [PATCH 530/600] mesa: Remove target parameter from
 dd_function_table::GetBufferSubData

No driver used that parameter, and most drivers ended up with a bunch
of unused-parameter warnings because it was there.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c   | 1 -
 src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c    | 2 +-
 src/mesa/drivers/dri/radeon/radeon_buffer_objects.c | 1 -
 src/mesa/main/bufferobj.c                           | 7 +++----
 src/mesa/main/dd.h                                  | 2 +-
 src/mesa/state_tracker/st_cb_bufferobjects.c        | 1 -
 src/mesa/vbo/vbo_save_draw.c                        | 2 +-
 7 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 307b5542798..d9c70dec85b 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -274,7 +274,6 @@ intel_bufferobj_subdata(struct gl_context * ctx,
  */
 static void
 intel_bufferobj_get_subdata(struct gl_context * ctx,
-                            GLenum target,
                             GLintptrARB offset,
                             GLsizeiptrARB size,
                             GLvoid * data, struct gl_buffer_object *obj)
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index 6b10d28c3cf..87a2bfee093 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -115,7 +115,7 @@ nouveau_bufferobj_subdata(struct gl_context *ctx, GLintptrARB offset,
 }
 
 static void
-nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset,
+nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLintptrARB offset,
 			   GLsizeiptrARB size, GLvoid *data,
 			   struct gl_buffer_object *obj)
 {
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index 319890c48ac..ee634363dca 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -154,7 +154,6 @@ radeonBufferSubData(struct gl_context * ctx,
  */
 static void
 radeonGetBufferSubData(struct gl_context * ctx,
-                       GLenum target,
                        GLintptrARB offset,
                        GLsizeiptrARB size,
                        GLvoid * data,
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index b0721ea600b..41e83b0d689 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -419,12 +419,11 @@ _mesa_buffer_subdata( struct gl_context *ctx, GLintptrARB offset,
  * \sa glBufferGetSubDataARB, dd_function_table::GetBufferSubData.
  */
 static void
-_mesa_buffer_get_subdata( struct gl_context *ctx,
-                          GLenum target, GLintptrARB offset,
+_mesa_buffer_get_subdata( struct gl_context *ctx, GLintptrARB offset,
 			  GLsizeiptrARB size, GLvoid * data,
 			  struct gl_buffer_object * bufObj )
 {
-   (void) ctx; (void) target;
+   (void) ctx;
 
    if (bufObj->Data && ((GLsizeiptrARB) (size + offset) <= bufObj->Size)) {
       memcpy( data, (GLubyte *) bufObj->Data + offset, size );
@@ -995,7 +994,7 @@ _mesa_GetBufferSubDataARB(GLenum target, GLintptrARB offset,
    }
 
    ASSERT(ctx->Driver.GetBufferSubData);
-   ctx->Driver.GetBufferSubData( ctx, target, offset, size, data, bufObj );
+   ctx->Driver.GetBufferSubData( ctx, offset, size, data, bufObj );
 }
 
 
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 54fcb88f69a..5560d96931a 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -702,7 +702,7 @@ struct dd_function_table {
 			  GLsizeiptrARB size, const GLvoid *data,
 			  struct gl_buffer_object *obj );
 
-   void (*GetBufferSubData)( struct gl_context *ctx, GLenum target,
+   void (*GetBufferSubData)( struct gl_context *ctx,
 			     GLintptrARB offset, GLsizeiptrARB size,
 			     GLvoid *data, struct gl_buffer_object *obj );
 
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index d71b38bbddf..2de56bdb54b 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -132,7 +132,6 @@ st_bufferobj_subdata(struct gl_context *ctx,
  */
 static void
 st_bufferobj_get_subdata(struct gl_context *ctx,
-                         GLenum target,
                          GLintptrARB offset,
                          GLsizeiptrARB size,
                          GLvoid * data, struct gl_buffer_object *obj)
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index 52952a57ec8..e7996f29307 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -70,7 +70,7 @@ _playback_copy_to_current(struct gl_context *ctx,
       else
          offset = node->buffer_offset;
 
-      ctx->Driver.GetBufferSubData( ctx, 0, offset, 
+      ctx->Driver.GetBufferSubData( ctx, offset,
                                     node->vertex_size * sizeof(GLfloat), 
                                     data, node->vertex_store->bufferobj );
 

From 4ddae2fb666c86e3267ef6e3d2699f9bfb40d206 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 17:37:56 -0700
Subject: [PATCH 531/600] mesa: Remove target parameter from
 dd_function_table::MapBufferRange

No driver used that parameter, and most drivers ended up with a bunch
of unused-parameter warnings because it was there.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c | 2 +-
 src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c  | 3 +--
 src/mesa/main/bufferobj.c                         | 6 ++----
 src/mesa/main/dd.h                                | 2 +-
 src/mesa/state_tracker/st_cb_bufferobjects.c      | 2 +-
 src/mesa/vbo/vbo_exec_draw.c                      | 3 +--
 6 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index d9c70dec85b..7a0102b7226 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -369,7 +369,7 @@ intel_bufferobj_map(struct gl_context * ctx,
  */
 static void *
 intel_bufferobj_map_range(struct gl_context * ctx,
-			  GLenum target, GLintptr offset, GLsizeiptr length,
+			  GLintptr offset, GLsizeiptr length,
 			  GLbitfield access, struct gl_buffer_object *obj)
 {
    struct intel_context *intel = intel_context(ctx);
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index 87a2bfee093..cf892a893f8 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -135,8 +135,7 @@ nouveau_bufferobj_map(struct gl_context *ctx, GLenum access,
 	    access == GL_READ_WRITE_ARB)
 		flags |= GL_MAP_WRITE_BIT;
 
-	return ctx->Driver.MapBufferRange(ctx, 0, 0, obj->Size, flags,
-					  obj);
+	return ctx->Driver.MapBufferRange(ctx, 0, obj->Size, flags, obj);
 }
 
 static void *
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 41e83b0d689..ba3811d315b 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -468,12 +468,11 @@ _mesa_buffer_map( struct gl_context *ctx, GLenum access,
  * Called via glMapBufferRange().
  */
 static void *
-_mesa_buffer_map_range( struct gl_context *ctx, GLenum target, GLintptr offset,
+_mesa_buffer_map_range( struct gl_context *ctx, GLintptr offset,
                         GLsizeiptr length, GLbitfield access,
                         struct gl_buffer_object *bufObj )
 {
    (void) ctx;
-   (void) target;
    assert(!_mesa_bufferobj_mapped(bufObj));
    /* Just return a direct pointer to the data */
    bufObj->Pointer = bufObj->Data + offset;
@@ -1445,8 +1444,7 @@ _mesa_MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
    }
       
    ASSERT(ctx->Driver.MapBufferRange);
-   map = ctx->Driver.MapBufferRange(ctx, target, offset, length,
-                                    access, bufObj);
+   map = ctx->Driver.MapBufferRange(ctx, offset, length, access, bufObj);
    if (!map) {
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)");
    }
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 5560d96931a..9876d5a53e0 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -717,7 +717,7 @@ struct dd_function_table {
 
    /* May return NULL if MESA_MAP_NOWAIT_BIT is set in access:
     */
-   void * (*MapBufferRange)( struct gl_context *ctx, GLenum target, GLintptr offset,
+   void * (*MapBufferRange)( struct gl_context *ctx, GLintptr offset,
                              GLsizeiptr length, GLbitfield access,
                              struct gl_buffer_object *obj);
 
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 2de56bdb54b..6857c00b08d 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -281,7 +281,7 @@ st_bufferobj_map(struct gl_context *ctx, GLenum access,
  * Called via glMapBufferRange().
  */
 static void *
-st_bufferobj_map_range(struct gl_context *ctx, GLenum target, 
+st_bufferobj_map_range(struct gl_context *ctx,
                        GLintptr offset, GLsizeiptr length, GLbitfield access,
                        struct gl_buffer_object *obj)
 {
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 07c5c969453..2c8340ca5ca 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -316,7 +316,6 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
       /* The VBO exists and there's room for more */
       exec->vtx.buffer_map = 
          (GLfloat *)ctx->Driver.MapBufferRange(ctx, 
-                                               target, 
                                                exec->vtx.buffer_used,
                                                (VBO_VERT_BUFFER_SIZE - 
                                                 exec->vtx.buffer_used),
@@ -336,7 +335,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
 
       if (ctx->Driver.MapBufferRange)
          exec->vtx.buffer_map = 
-            (GLfloat *)ctx->Driver.MapBufferRange(ctx, target,
+            (GLfloat *)ctx->Driver.MapBufferRange(ctx,
                                                   0, VBO_VERT_BUFFER_SIZE,
                                                   accessRange,
                                                   exec->vtx.bufferobj);

From f973be59fa293ea75f05cdbac2372360deb5e186 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 17:55:33 -0700
Subject: [PATCH 532/600] intel: Correctly check for read-only mappings in
 intel_bufferobj_map_range

The old code was an obvious cut-and-paste fail from intel_bufferobj_map.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 7a0102b7226..16a56b03a2e 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -374,7 +374,6 @@ intel_bufferobj_map_range(struct gl_context * ctx,
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-   GLboolean read_only = (access == GL_READ_ONLY_ARB);
 
    assert(intel_obj);
 
@@ -386,6 +385,9 @@ intel_bufferobj_map_range(struct gl_context * ctx,
    obj->AccessFlags = access;
 
    if (intel_obj->sys_buffer) {
+      const bool read_only =
+	 (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT;
+
       if (!read_only && intel_obj->source)
 	 release_buffer(intel_obj);
 

From 6183edc070e2d3dce36ab5ee7aee72b0c38775a7 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 17:56:39 -0700
Subject: [PATCH 533/600] mesa: Remove target parameter from
 dd_function_table::FlushMappedBufferRange

No driver used that parameter, and most drivers ended up with a bunch
of unused-parameter warnings because it was there.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/intel/intel_buffer_objects.c | 2 +-
 src/mesa/main/bufferobj.c                         | 5 ++---
 src/mesa/main/dd.h                                | 2 +-
 src/mesa/state_tracker/st_cb_bufferobjects.c      | 2 +-
 src/mesa/vbo/vbo_exec_draw.c                      | 3 +--
 5 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 16a56b03a2e..1a0f5486c4f 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -471,7 +471,7 @@ intel_bufferobj_map_range(struct gl_context * ctx,
  * would defeat the point.
  */
 static void
-intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
+intel_bufferobj_flush_mapped_range(struct gl_context *ctx,
 				   GLintptr offset, GLsizeiptr length,
 				   struct gl_buffer_object *obj)
 {
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index ba3811d315b..47af8b59587 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -488,12 +488,11 @@ _mesa_buffer_map_range( struct gl_context *ctx, GLintptr offset,
  * Called via glFlushMappedBufferRange().
  */
 static void
-_mesa_buffer_flush_mapped_range( struct gl_context *ctx, GLenum target, 
+_mesa_buffer_flush_mapped_range( struct gl_context *ctx,
                                  GLintptr offset, GLsizeiptr length,
                                  struct gl_buffer_object *obj )
 {
    (void) ctx;
-   (void) target;
    (void) offset;
    (void) length;
    (void) obj;
@@ -1527,7 +1526,7 @@ _mesa_FlushMappedBufferRange(GLenum target, GLintptr offset, GLsizeiptr length)
    ASSERT(bufObj->AccessFlags & GL_MAP_WRITE_BIT);
 
    if (ctx->Driver.FlushMappedBufferRange)
-      ctx->Driver.FlushMappedBufferRange(ctx, target, offset, length, bufObj);
+      ctx->Driver.FlushMappedBufferRange(ctx, offset, length, bufObj);
 }
 
 
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 9876d5a53e0..8dfea937f37 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -721,7 +721,7 @@ struct dd_function_table {
                              GLsizeiptr length, GLbitfield access,
                              struct gl_buffer_object *obj);
 
-   void (*FlushMappedBufferRange)(struct gl_context *ctx, GLenum target, 
+   void (*FlushMappedBufferRange)(struct gl_context *ctx,
                                   GLintptr offset, GLsizeiptr length,
                                   struct gl_buffer_object *obj);
 
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 6857c00b08d..dd2eca7bc96 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -351,7 +351,7 @@ st_bufferobj_map_range(struct gl_context *ctx,
 
 
 static void
-st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, 
+st_bufferobj_flush_mapped_range(struct gl_context *ctx,
                                 GLintptr offset, GLsizeiptr length,
                                 struct gl_buffer_object *obj)
 {
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 2c8340ca5ca..2dc60661796 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -270,8 +270,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
          GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float);
 
          if (length)
-            ctx->Driver.FlushMappedBufferRange(ctx, target,
-                                               offset, length,
+            ctx->Driver.FlushMappedBufferRange(ctx, offset, length,
                                                exec->vtx.bufferobj);
       }
 

From b2184da684fc20849b5e6e554f0a0f92d2872ce9 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 18:45:06 -0700
Subject: [PATCH 534/600] mesa: Fix incorrect access parameter passed to
 MapBuffer

The code previously passed GL_DYNAMIC_DRAW for the access parameter.
By inspection, I believe that all drivers would treat this as
GL_READ_WRITE because it's not GL_READ_ONLY and it's not
GL_WRITE_ONLY.

It appears the i965 code wants GL_WRITE_ONLY (it's about to write a
bunch of data in, never read data), while the arrayelt code is
GL_READ_ONLY (just dereffed as arguments to CALL_Whatever*v).

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Keith Whitwell <keithw@vmware.com>
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c | 4 +---
 src/mesa/main/api_arrayelt.c                | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 66c42aa0779..9acec45799a 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -689,9 +689,7 @@ static void brw_prepare_indices(struct brw_context *brw)
        * rebase it into a temporary.
        */
        if ((get_size(index_buffer->type) - 1) & offset) {
-           GLubyte *map = ctx->Driver.MapBuffer(ctx,
-                                                GL_DYNAMIC_DRAW_ARB,
-                                                bufferobj);
+           GLubyte *map = ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY, bufferobj);
            map += offset;
 
 	   intel_upload_data(&brw->intel, map, ib_size, ib_type_size,
diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index 6400c8f59d7..8e1e3ff8dd5 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1602,9 +1602,7 @@ void _ae_map_vbos( struct gl_context *ctx )
       _ae_update_state(ctx);
 
    for (i = 0; i < actx->nr_vbos; i++)
-      ctx->Driver.MapBuffer(ctx,
-			    GL_DYNAMIC_DRAW_ARB,
-			    actx->vbo[i]);
+      ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, actx->vbo[i]);
 
    if (actx->nr_vbos)
       actx->mapped_vbos = GL_TRUE;

From cccc7412c22a704d85203d7bb9c8e73d45cccf49 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 18:32:09 -0700
Subject: [PATCH 535/600] radeon: Hack up an implementation of MapBufferRange

This doesn't implement any of the "cool" features of MapBufferRange.
Adding this function is necessary for the next commit in the series.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Alex Deucher <alexdeucher@gmail.com>
Cc: Maciej Cencora <m.cencora@gmail.com>
---
 .../dri/radeon/radeon_buffer_objects.c        | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index ee634363dca..e106d98c38e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -197,6 +197,38 @@ radeonMapBuffer(struct gl_context * ctx,
 }
 
 
+/**
+ * Called via glMapBufferRange()
+ */
+static void *
+radeonMapBufferRange(struct gl_context * ctx,
+		     GLintptr offset, GLsizeiptr length,
+		     GLbitfield access, struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+    const GLboolean write_only =
+       (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_WRITE_BIT;
+
+    if (write_only) {
+        ctx->Driver.Flush(ctx);
+    }
+
+    if (radeon_obj->bo == NULL) {
+        obj->Pointer = NULL;
+        return NULL;
+    }
+
+    obj->Offset = offset;
+    obj->Length = length;
+    obj->AccessFlags = access;
+
+    radeon_bo_map(radeon_obj->bo, write_only);
+
+    obj->Pointer = radeon_obj->bo->ptr + offset;
+    return obj->Pointer;
+}
+
+
 /**
  * Called via glUnmapBufferARB()
  */
@@ -226,5 +258,6 @@ radeonInitBufferObjectFuncs(struct dd_function_table *functions)
     functions->BufferSubData = radeonBufferSubData;
     functions->GetBufferSubData = radeonGetBufferSubData;
     functions->MapBuffer = radeonMapBuffer;
+    functions->MapBufferRange = radeonMapBufferRange;
     functions->UnmapBuffer = radeonUnmapBuffer;
 }

From 28249bd260f4c52badf3eb61ade2744604b21bca Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Sun, 21 Aug 2011 18:34:27 -0700
Subject: [PATCH 536/600] mesa: Eliminate dd_function_table::MapBuffer

Replace all calls to dd_function_table::MapBuffer with appropriate
calls to dd_function_table::MapBufferRange, then remove all the cruft.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c   |  6 +-
 .../drivers/dri/intel/intel_buffer_objects.c  | 69 ++-----------------
 .../drivers/dri/intel/intel_pixel_bitmap.c    |  6 +-
 .../drivers/dri/nouveau/nouveau_bufferobj.c   | 17 -----
 src/mesa/drivers/dri/r300/r300_draw.c         | 14 ++--
 src/mesa/drivers/dri/r600/evergreen_render.c  | 12 ++--
 src/mesa/drivers/dri/r600/r700_render.c       | 12 ++--
 .../dri/radeon/radeon_buffer_objects.c        | 32 +--------
 src/mesa/drivers/x11/xm_dd.c                  | 15 ++--
 src/mesa/main/api_arrayelt.c                  |  5 +-
 src/mesa/main/api_validate.c                  |  3 +-
 src/mesa/main/bufferobj.c                     | 43 ++----------
 src/mesa/main/dd.h                            |  3 -
 src/mesa/main/dlist.c                         |  3 +-
 src/mesa/main/pbo.c                           | 24 ++++---
 src/mesa/main/texgetimage.c                   |  6 +-
 src/mesa/state_tracker/st_cb_bufferobjects.c  | 43 ------------
 src/mesa/tnl/t_draw.c                         | 11 ++-
 src/mesa/vbo/vbo_exec_array.c                 | 24 ++++---
 src/mesa/vbo/vbo_exec_draw.c                  | 18 ++---
 src/mesa/vbo/vbo_rebase.c                     |  3 +-
 src/mesa/vbo/vbo_save_api.c                   |  8 +--
 src/mesa/vbo/vbo_save_draw.c                  |  8 ++-
 src/mesa/vbo/vbo_split_copy.c                 |  5 +-
 24 files changed, 123 insertions(+), 267 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 9acec45799a..ed1964f5a6f 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -689,7 +689,11 @@ static void brw_prepare_indices(struct brw_context *brw)
        * rebase it into a temporary.
        */
        if ((get_size(index_buffer->type) - 1) & offset) {
-           GLubyte *map = ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY, bufferobj);
+           GLubyte *map = ctx->Driver.MapBufferRange(ctx,
+						     0,
+						     bufferobj->Size,
+						     GL_MAP_WRITE_BIT,
+						     bufferobj);
            map += offset;
 
 	   intel_upload_data(&brw->intel, map, ib_size, ib_type_size,
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 1a0f5486c4f..d908975fc87 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -295,64 +295,7 @@ intel_bufferobj_get_subdata(struct gl_context * ctx,
 
 
 /**
- * Called via glMapBufferARB().
- */
-static void *
-intel_bufferobj_map(struct gl_context * ctx,
-                    GLenum access, struct gl_buffer_object *obj)
-{
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
-   GLboolean read_only = (access == GL_READ_ONLY_ARB);
-   GLboolean write_only = (access == GL_WRITE_ONLY_ARB);
-
-   assert(intel_obj);
-
-   if (intel_obj->sys_buffer) {
-      if (!read_only && intel_obj->source) {
-	 release_buffer(intel_obj);
-      }
-
-      if (!intel_obj->buffer || intel_obj->source) {
-	 obj->Pointer = intel_obj->sys_buffer;
-	 obj->Length = obj->Size;
-	 obj->Offset = 0;
-	 return obj->Pointer;
-      }
-
-      free(intel_obj->sys_buffer);
-      intel_obj->sys_buffer = NULL;
-   }
-
-   /* Flush any existing batchbuffer that might reference this data. */
-   if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer))
-      intel_flush(ctx);
-
-   if (intel_obj->region)
-      intel_bufferobj_cow(intel, intel_obj);
-
-   if (intel_obj->buffer == NULL) {
-      obj->Pointer = NULL;
-      return NULL;
-   }
-
-   if (write_only) {
-      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
-      intel_obj->mapped_gtt = GL_TRUE;
-   } else {
-      drm_intel_bo_map(intel_obj->buffer, !read_only);
-      intel_obj->mapped_gtt = GL_FALSE;
-   }
-
-   obj->Pointer = intel_obj->buffer->virtual;
-   obj->Length = obj->Size;
-   obj->Offset = 0;
-
-   return obj->Pointer;
-}
-
-/**
- * Called via glMapBufferRange().
+ * Called via glMapBufferRange and glMapBuffer
  *
  * The goal of this extension is to allow apps to accumulate their rendering
  * at the same time as they accumulate their buffer object.  Without it,
@@ -760,15 +703,18 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
        * not overlap.
        */
       if (src == dst) {
-	 char *ptr = intel_bufferobj_map(ctx, GL_READ_WRITE, dst);
+	 char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size,
+					       GL_MAP_READ_BIT, dst);
 	 memmove(ptr + write_offset, ptr + read_offset, size);
 	 intel_bufferobj_unmap(ctx, dst);
       } else {
 	 const char *src_ptr;
 	 char *dst_ptr;
 
-	 src_ptr =  intel_bufferobj_map(ctx, GL_READ_ONLY, src);
-	 dst_ptr =  intel_bufferobj_map(ctx, GL_WRITE_ONLY, dst);
+	 src_ptr =  intel_bufferobj_map_range(ctx, 0, src->Size,
+					      GL_MAP_READ_BIT, src);
+	 dst_ptr =  intel_bufferobj_map_range(ctx, 0, dst->Size,
+					      GL_MAP_WRITE_BIT, dst);
 
 	 memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
 
@@ -923,7 +869,6 @@ intelInitBufferObjectFuncs(struct dd_function_table *functions)
    functions->BufferData = intel_bufferobj_data;
    functions->BufferSubData = intel_bufferobj_subdata;
    functions->GetBufferSubData = intel_bufferobj_get_subdata;
-   functions->MapBuffer = intel_bufferobj_map;
    functions->MapBufferRange = intel_bufferobj_map_range;
    functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
    functions->UnmapBuffer = intel_bufferobj_unmap;
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 44d79534ffd..d9873a303ee 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -74,9 +74,9 @@ static const GLubyte *map_pbo( struct gl_context *ctx,
       return NULL;
    }
 
-   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-					   GL_READ_ONLY_ARB,
-					   unpack->BufferObj);
+   buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+						GL_MAP_READ_BIT,
+						unpack->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
       return NULL;
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
index cf892a893f8..433590c4181 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -122,22 +122,6 @@ nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLintptrARB offset,
 	memcpy(data, get_bufferobj_map(obj, NOUVEAU_BO_RD) + offset, size);
 }
 
-static void *
-nouveau_bufferobj_map(struct gl_context *ctx, GLenum access,
-		   struct gl_buffer_object *obj)
-{
-	unsigned flags = 0;
-
-	if (access == GL_READ_ONLY_ARB ||
-	    access == GL_READ_WRITE_ARB)
-		flags |= GL_MAP_READ_BIT;
-	if (access == GL_WRITE_ONLY_ARB ||
-	    access == GL_READ_WRITE_ARB)
-		flags |= GL_MAP_WRITE_BIT;
-
-	return ctx->Driver.MapBufferRange(ctx, 0, obj->Size, flags, obj);
-}
-
 static void *
 nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offset,
 			    GLsizeiptr length, GLbitfield access,
@@ -188,7 +172,6 @@ nouveau_bufferobj_functions_init(struct dd_function_table *functions)
 	functions->BufferData = nouveau_bufferobj_data;
 	functions->BufferSubData = nouveau_bufferobj_subdata;
 	functions->GetBufferSubData = nouveau_bufferobj_get_subdata;
-	functions->MapBuffer = nouveau_bufferobj_map;
 	functions->MapBufferRange = nouveau_bufferobj_map_range;
 	functions->UnmapBuffer = nouveau_bufferobj_unmap;
 }
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index ba37923736c..5587c16dd44 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -84,7 +84,8 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 	GLboolean mapped_named_bo = GL_FALSE;
 
 	if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
-		ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+		ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+					   GL_MAP_READ_BIT, mesa_ind_buf->obj);
 		mapped_named_bo = GL_TRUE;
 		assert(mesa_ind_buf->obj->Pointer != NULL);
 	}
@@ -163,7 +164,10 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 		GLboolean mapped_named_bo = GL_FALSE;
 
 		if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
-			ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+			ctx->Driver.MapBufferRange(ctx, 0,
+						   mesa_ind_buf->obj->Size,
+						   GL_MAP_READ_BIT,
+						   mesa_ind_buf->obj);
 			assert(mesa_ind_buf->obj->Pointer != NULL);
 			mapped_named_bo = GL_TRUE;
 		}
@@ -235,7 +239,8 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl
 
 	if (input->BufferObj->Name) {
 		if (!input->BufferObj->Pointer) {
-			ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
+			ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+					      GL_MAP_READ_BIT, input->BufferObj);
 			mapped_named_bo = GL_TRUE;
 		}
 
@@ -302,7 +307,8 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_
 	radeon_bo_map(attr->bo, 1);
 
 	if (!input->BufferObj->Pointer) {
-		ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
+		ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+					   GL_MAP_READ_BIT, input->BufferObj);
 		mapped_named_bo = GL_TRUE;
 	}
 
diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c
index e82f4d445f5..74563caf47c 100644
--- a/src/mesa/drivers/dri/r600/evergreen_render.c
+++ b/src/mesa/drivers/dri/r600/evergreen_render.c
@@ -403,7 +403,8 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count,
     {
         if (!input->BufferObj->Pointer) 
         {
-            ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
+	    ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+				       GL_MAP_READ_BIT, input->BufferObj);
             mapped_named_bo = GL_TRUE;
         }
 
@@ -470,7 +471,8 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa
 
     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
     {
-        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+        ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+				   GL_MAP_READ_BIT, mesa_ind_buf->obj);
         mapped_named_bo = GL_TRUE;
         assert(mesa_ind_buf->obj->Pointer != NULL);
     }
@@ -606,7 +608,8 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa
 
         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
         {
-	        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+	        ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+					   GL_MAP_READ_BIT, mesa_ind_buf->obj);
 	        assert(mesa_ind_buf->obj->Pointer != NULL);
 	        mapped_named_bo = GL_TRUE;
         }
@@ -655,7 +658,8 @@ static void evergreenAlignDataToDword(struct gl_context *ctx,
 
     if (!input->BufferObj->Pointer) 
     {
-        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
+	ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+				   GL_MAP_READ_BIT, input->BufferObj->obj);
         mapped_named_bo = GL_TRUE;
     }
 
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 52a6f7cc45e..a565c9f2087 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -490,7 +490,8 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count,
     {
         if (!input->BufferObj->Pointer) 
         {
-            ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
+	   ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+				      GL_MAP_READ_BIT, input->BufferObj);
             mapped_named_bo = GL_TRUE;
         }
 
@@ -564,7 +565,8 @@ static void r700AlignDataToDword(struct gl_context *ctx,
 
     if (!input->BufferObj->Pointer) 
     {
-        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, input->BufferObj);
+        ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size,
+				   GL_MAP_READ_BIT, input->BufferObj);
         mapped_named_bo = GL_TRUE;
     }
 
@@ -727,7 +729,8 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 
     if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
     {
-        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+	ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+				   GL_MAP_READ_BIT, mesa_ind_buf->obj);
         mapped_named_bo = GL_TRUE;
         assert(mesa_ind_buf->obj->Pointer != NULL);
     }
@@ -813,7 +816,8 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde
 
         if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
         {
-	        ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+		ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size,
+					   GL_MAP_READ_BIT, mesa_ind_buf->obj);
 	        assert(mesa_ind_buf->obj->Pointer != NULL);
 	        mapped_named_bo = GL_TRUE;
         }
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
index e106d98c38e..7b59c0377f8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -169,36 +169,7 @@ radeonGetBufferSubData(struct gl_context * ctx,
 }
 
 /**
- * Called via glMapBufferARB()
- */
-static void *
-radeonMapBuffer(struct gl_context * ctx,
-                GLenum access,
-                struct gl_buffer_object *obj)
-{
-    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
-
-    if (access == GL_WRITE_ONLY_ARB) {
-        ctx->Driver.Flush(ctx);
-    }
-
-    if (radeon_obj->bo == NULL) {
-        obj->Pointer = NULL;
-        return NULL;
-    }
-
-    radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB);
-
-    obj->Pointer = radeon_obj->bo->ptr;
-    obj->Length = obj->Size;
-    obj->Offset = 0;
-
-    return obj->Pointer;
-}
-
-
-/**
- * Called via glMapBufferRange()
+ * Called via glMapBuffer() and glMapBufferRange()
  */
 static void *
 radeonMapBufferRange(struct gl_context * ctx,
@@ -257,7 +228,6 @@ radeonInitBufferObjectFuncs(struct dd_function_table *functions)
     functions->BufferData = radeonBufferData;
     functions->BufferSubData = radeonBufferSubData;
     functions->GetBufferSubData = radeonGetBufferSubData;
-    functions->MapBuffer = radeonMapBuffer;
     functions->MapBufferRange = radeonMapBufferRange;
     functions->UnmapBuffer = radeonUnmapBuffer;
 }
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index fe00bdd520d..3a5d0ae04fc 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -454,9 +454,10 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx,
                         "glDrawPixels(invalid PBO access)");
             return;
          }
-         buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-                                                 GL_READ_ONLY_ARB,
-                                                 unpack->BufferObj);
+         buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+						      unpack->BufferObj->Size,
+						      GL_MAP_READ_BIT,
+						      unpack->BufferObj);
          if (!buf) {
             /* buffer is already mapped - that's an error */
             _mesa_error(ctx, GL_INVALID_OPERATION,
@@ -587,10 +588,10 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx,
                         "glDrawPixels(invalid PBO access)");
             return;
          }
-         buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-                                                 GL_PIXEL_UNPACK_BUFFER_EXT,
-                                                 GL_READ_ONLY_ARB,
-                                                 unpack->BufferObj);
+         buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0
+						      unpack->BufferObj->Size,
+						      GL_MAP_READ_BIT,
+						      unpack->BufferObj);
          if (!buf) {
             /* buffer is already mapped - that's an error */
             _mesa_error(ctx, GL_INVALID_OPERATION,
diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c
index 8e1e3ff8dd5..b93a057e68b 100644
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1602,7 +1602,10 @@ void _ae_map_vbos( struct gl_context *ctx )
       _ae_update_state(ctx);
 
    for (i = 0; i < actx->nr_vbos; i++)
-      ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, actx->vbo[i]);
+      ctx->Driver.MapBufferRange(ctx, 0,
+				 actx->vbo[i]->Size,
+				 GL_MAP_READ_BIT,
+				 actx->vbo[i]);
 
    if (actx->nr_vbos)
       actx->mapped_vbos = GL_TRUE;
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index 507d0ce6883..699b414f502 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -65,7 +65,8 @@ _mesa_max_buffer_index(struct gl_context *ctx, GLuint count, GLenum type,
 
    if (_mesa_is_bufferobj(elementBuf)) {
       /* elements are in a user-defined buffer object.  need to map it */
-      map = ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, elementBuf);
+      map = ctx->Driver.MapBufferRange(ctx, 0, elementBuf->Size,
+				       GL_MAP_READ_BIT, elementBuf);
       /* Actual address is the sum of pointers */
       indices = (const GLvoid *) ADD_POINTERS(map, (const GLubyte *) indices);
    }
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 47af8b59587..c453f9c8554 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -431,38 +431,6 @@ _mesa_buffer_get_subdata( struct gl_context *ctx, GLintptrARB offset,
 }
 
 
-/**
- * Default callback for \c dd_function_tabel::MapBuffer().
- *
- * The function parameters will have been already tested for errors.
- *
- * \param ctx     GL context.
- * \param target  Buffer object target on which to operate.
- * \param access  Information about how the buffer will be accessed.
- * \param bufObj  Object to be mapped.
- * \return  A pointer to the object's internal data store that can be accessed
- *          by the processor
- *
- * \sa glMapBufferARB, dd_function_table::MapBuffer
- */
-static void *
-_mesa_buffer_map( struct gl_context *ctx, GLenum access,
-		  struct gl_buffer_object *bufObj )
-{
-   (void) ctx;
-   (void) access;
-   /* Just return a direct pointer to the data */
-   if (_mesa_bufferobj_mapped(bufObj)) {
-      /* already mapped! */
-      return NULL;
-   }
-   bufObj->Pointer = bufObj->Data;
-   bufObj->Length = bufObj->Size;
-   bufObj->Offset = 0;
-   return bufObj->Pointer;
-}
-
-
 /**
  * Default fallback for \c dd_function_table::MapBufferRange().
  * Called via glMapBufferRange().
@@ -537,8 +505,10 @@ _mesa_copy_buffer_subdata(struct gl_context *ctx,
    assert(!_mesa_bufferobj_mapped(src));
    assert(!_mesa_bufferobj_mapped(dst));
 
-   srcPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, src);
-   dstPtr = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY, dst);
+   srcPtr = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, src->Size,
+						   GL_MAP_READ_BIT, src);
+   dstPtr = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, dst->Size,
+						   GL_MAP_WRITE_BIT, dst);
 
    if (srcPtr && dstPtr)
       memcpy(dstPtr + writeOffset, srcPtr + readOffset, size);
@@ -704,7 +674,6 @@ _mesa_init_buffer_object_functions(struct dd_function_table *driver)
    driver->BufferData = _mesa_buffer_data;
    driver->BufferSubData = _mesa_buffer_subdata;
    driver->GetBufferSubData = _mesa_buffer_get_subdata;
-   driver->MapBuffer = _mesa_buffer_map;
    driver->UnmapBuffer = _mesa_buffer_unmap;
 
    /* GL_ARB_map_buffer_range */
@@ -1035,8 +1004,8 @@ _mesa_MapBufferARB(GLenum target, GLenum access)
       return NULL;
    }
 
-   ASSERT(ctx->Driver.MapBuffer);
-   map = ctx->Driver.MapBuffer( ctx, access, bufObj );
+   ASSERT(ctx->Driver.MapBufferRange);
+   map = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size, accessFlags, bufObj);
    if (!map) {
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMapBufferARB(map failed)");
       return NULL;
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 8dfea937f37..fcf40ecf102 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -706,9 +706,6 @@ struct dd_function_table {
 			     GLintptrARB offset, GLsizeiptrARB size,
 			     GLvoid *data, struct gl_buffer_object *obj );
 
-   void * (*MapBuffer)( struct gl_context *ctx, GLenum access,
-			struct gl_buffer_object *obj );
-
    void (*CopyBufferSubData)( struct gl_context *ctx,
                               struct gl_buffer_object *src,
                               struct gl_buffer_object *dst,
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index a135810ed27..6e075b4e54b 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -894,7 +894,8 @@ unpack_image(struct gl_context *ctx, GLuint dimensions,
       GLvoid *image;
 
       map = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, unpack->BufferObj);
+         ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+				    GL_MAP_READ_BIT, unpack->BufferObj);
       if (!map) {
          /* unable to map src buffer! */
          _mesa_error(ctx, GL_INVALID_OPERATION, "unable to map PBO");
diff --git a/src/mesa/main/pbo.c b/src/mesa/main/pbo.c
index ce362b9e444..4e7e6f925cc 100644
--- a/src/mesa/main/pbo.c
+++ b/src/mesa/main/pbo.c
@@ -128,9 +128,10 @@ _mesa_map_pbo_source(struct gl_context *ctx,
 
    if (_mesa_is_bufferobj(unpack->BufferObj)) {
       /* unpack from PBO */
-      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-                                              GL_READ_ONLY_ARB,
-                                              unpack->BufferObj);
+      buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+						   unpack->BufferObj->Size,
+						   GL_MAP_READ_BIT,
+						   unpack->BufferObj);
       if (!buf)
          return NULL;
 
@@ -223,9 +224,10 @@ _mesa_map_pbo_dest(struct gl_context *ctx,
 
    if (_mesa_is_bufferobj(pack->BufferObj)) {
       /* pack into PBO */
-      buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
-                                              GL_WRITE_ONLY_ARB,
-                                              pack->BufferObj);
+      buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
+						   pack->BufferObj->Size,
+						   GL_MAP_WRITE_BIT,
+						   pack->BufferObj);
       if (!buf)
          return NULL;
 
@@ -326,8 +328,9 @@ _mesa_validate_pbo_teximage(struct gl_context *ctx, GLuint dimensions,
       return NULL;
    }
 
-   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB,
-					   unpack->BufferObj);
+   buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+						GL_MAP_READ_BIT,
+						unpack->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped)");
       return NULL;
@@ -363,7 +366,10 @@ _mesa_validate_pbo_compressed_teximage(struct gl_context *ctx,
       return NULL;
    }
 
-   buf = (GLubyte*) ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, packing->BufferObj);
+   buf = (GLubyte*) ctx->Driver.MapBufferRange(ctx, 0,
+					       packing->BufferObj->Size,
+					       GL_MAP_READ_BIT,
+					       packing->BufferObj);
    if (!buf) {
       _mesa_error(ctx, GL_INVALID_OPERATION, funcName, "(PBO is mapped");
       return NULL;
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index a54da7160c7..b2ebb0de475 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -441,7 +441,8 @@ _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level,
        * texture data to the PBO if the PBO is in VRAM along with the texture.
        */
       GLubyte *buf = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
+         ctx->Driver.MapBufferRange(ctx, 0, ctx->Pack.BufferObj->Size,
+				    GL_MAP_WRITE_BIT, ctx->Pack.BufferObj);
       if (!buf) {
          /* out of memory or other unexpected error */
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage(map PBO failed)");
@@ -498,7 +499,8 @@ _mesa_get_compressed_teximage(struct gl_context *ctx, GLenum target, GLint level
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
       /* pack texture image into a PBO */
       GLubyte *buf = (GLubyte *)
-         ctx->Driver.MapBuffer(ctx, GL_WRITE_ONLY_ARB, ctx->Pack.BufferObj);
+         ctx->Driver.MapBufferRange(ctx, 0, ctx->Pack.BufferObj->Size,
+				    GL_MAP_WRITE_BIT, ctx->Pack.BufferObj);
       if (!buf) {
          /* out of memory or other unexpected error */
          _mesa_error(ctx, GL_OUT_OF_MEMORY,
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index dd2eca7bc96..a451b44049e 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -235,48 +235,6 @@ static long st_bufferobj_zero_length = 0;
 
 
 
-/**
- * Called via glMapBufferARB().
- */
-static void *
-st_bufferobj_map(struct gl_context *ctx, GLenum access,
-                 struct gl_buffer_object *obj)
-{
-   struct st_buffer_object *st_obj = st_buffer_object(obj);
-   uint flags;
-
-   switch (access) {
-   case GL_WRITE_ONLY:
-      flags = PIPE_TRANSFER_WRITE;
-      break;
-   case GL_READ_ONLY:
-      flags = PIPE_TRANSFER_READ;
-      break;
-   case GL_READ_WRITE:
-   default:
-      flags = PIPE_TRANSFER_READ_WRITE;
-      break;      
-   }
-
-   /* Handle zero-size buffers here rather than in drivers */
-   if (obj->Size == 0) {
-      obj->Pointer = &st_bufferobj_zero_length;
-   }
-   else {
-      obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe,
-                                     st_obj->buffer,
-                                     flags,
-                                     &st_obj->transfer);
-   }
-
-   if (obj->Pointer) {
-      obj->Offset = 0;
-      obj->Length = obj->Size;
-   }
-   return obj->Pointer;
-}
-
-
 /**
  * Called via glMapBufferRange().
  */
@@ -442,7 +400,6 @@ st_init_bufferobject_functions(struct dd_function_table *functions)
    functions->BufferData = st_bufferobj_data;
    functions->BufferSubData = st_bufferobj_subdata;
    functions->GetBufferSubData = st_bufferobj_get_subdata;
-   functions->MapBuffer = st_bufferobj_map;
    functions->MapBufferRange = st_bufferobj_map_range;
    functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range;
    functions->UnmapBuffer = st_bufferobj_unmap;
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index 7351f6f3be6..a23d1754391 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -280,9 +280,9 @@ static void bind_inputs( struct gl_context *ctx,
 	 if (!inputs[i]->BufferObj->Pointer) {
 	    bo[*nr_bo] = inputs[i]->BufferObj;
 	    (*nr_bo)++;
-	    ctx->Driver.MapBuffer(ctx, 
-				  GL_READ_ONLY_ARB,
-				  inputs[i]->BufferObj);
+	    ctx->Driver.MapBufferRange(ctx, 0, inputs[i]->BufferObj->Size,
+				       GL_MAP_READ_BIT,
+				       inputs[i]->BufferObj);
 	    
 	    assert(inputs[i]->BufferObj->Pointer);
 	 }
@@ -349,9 +349,8 @@ static void bind_indices( struct gl_context *ctx,
    if (ib->obj->Name && !ib->obj->Pointer) {
       bo[*nr_bo] = ib->obj;
       (*nr_bo)++;
-      ctx->Driver.MapBuffer(ctx, 
-			    GL_READ_ONLY_ARB,
-			    ib->obj);
+      ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
+				 ib->obj);
 
       assert(ib->obj->Pointer);
    }
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 8359a7f1529..484e1a85e41 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -96,7 +96,8 @@ vbo_get_minmax_index(struct gl_context *ctx,
 
    if (_mesa_is_bufferobj(ib->obj)) {
       const GLvoid *map =
-         ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, ib->obj);
+         ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
+				    ib->obj);
       indices = ADD_POINTERS(map, ib->ptr);
    } else {
       indices = ib->ptr;
@@ -195,7 +196,8 @@ check_array_data(struct gl_context *ctx, struct gl_client_array *array,
          if (!array->BufferObj->Pointer) {
             /* need to map now */
             array->BufferObj->Pointer =
-               ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, array->BufferObj);
+               ctx->Driver.MapBufferRange(ctx, 0, array->BufferObj->Size,
+					  GL_MAP_READ_BIT, array->BufferObj);
          }
          data = ADD_POINTERS(data, array->BufferObj->Pointer);
       }
@@ -254,9 +256,10 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType,
    GLint i, k;
 
    if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
-      elemMap = ctx->Driver.MapBuffer(ctx,
-                                      GL_READ_ONLY,
-                                      ctx->Array.ElementArrayBufferObj);
+      elemMap = ctx->Driver.MapBufferRange(ctx, 0,
+					   ctx->Array.ElementArrayBufferObj->Size,
+					   GL_MAP_READ_BIT,
+					   ctx->Array.ElementArrayBufferObj);
       elements = ADD_POINTERS(elements, elemMap);
    }
 
@@ -347,7 +350,8 @@ print_draw_arrays(struct gl_context *ctx,
 	     bufName);
 
       if (bufName) {
-         GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, bufObj);
+         GLubyte *p = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size,
+						 GL_MAP_READ_BIT, bufObj);
          int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr;
          float *f = (float *) (p + offset);
          int *k = (int *) f;
@@ -710,9 +714,11 @@ vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
 static void
 dump_element_buffer(struct gl_context *ctx, GLenum type)
 {
-   const GLvoid *map = ctx->Driver.MapBuffer(ctx,
-                                             GL_READ_ONLY,
-                                             ctx->Array.ElementArrayBufferObj);
+   const GLvoid *map =
+      ctx->Driver.MapBufferRange(ctx, 0,
+				 ctx->Array.ElementArrayBufferObj->Size,
+				 GL_MAP_READ_BIT,
+				 ctx->Array.ElementArrayBufferObj);
    switch (type) {
    case GL_UNSIGNED_BYTE:
       {
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 2dc60661796..25436c6d6d2 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -296,7 +296,6 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
 {
    struct gl_context *ctx = exec->ctx;
    const GLenum target = GL_ARRAY_BUFFER_ARB;
-   const GLenum access = GL_READ_WRITE_ARB; /* for MapBuffer */
    const GLenum accessRange = GL_MAP_WRITE_BIT |  /* for MapBufferRange */
                               GL_MAP_INVALIDATE_RANGE_BIT |
                               GL_MAP_UNSYNCHRONIZED_BIT |
@@ -310,8 +309,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
    assert(!exec->vtx.buffer_map);
    assert(!exec->vtx.buffer_ptr);
 
-   if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 &&
-       ctx->Driver.MapBufferRange) {
+   if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024) {
       /* The VBO exists and there's room for more */
       exec->vtx.buffer_map = 
          (GLfloat *)ctx->Driver.MapBufferRange(ctx, 
@@ -332,15 +330,11 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
                              NULL, usage, exec->vtx.bufferobj);
 
 
-      if (ctx->Driver.MapBufferRange)
-         exec->vtx.buffer_map = 
-            (GLfloat *)ctx->Driver.MapBufferRange(ctx,
-                                                  0, VBO_VERT_BUFFER_SIZE,
-                                                  accessRange,
-                                                  exec->vtx.bufferobj);
-      if (!exec->vtx.buffer_map)
-         exec->vtx.buffer_map =
-            (GLfloat *)ctx->Driver.MapBuffer(ctx, access, exec->vtx.bufferobj);
+      exec->vtx.buffer_map =
+	 (GLfloat *)ctx->Driver.MapBufferRange(ctx,
+					       0, VBO_VERT_BUFFER_SIZE,
+					       accessRange,
+					       exec->vtx.bufferobj);
       assert(exec->vtx.buffer_map);
       exec->vtx.buffer_ptr = exec->vtx.buffer_map;
    }
diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c
index e10908d5ece..a1eab752ad6 100644
--- a/src/mesa/vbo/vbo_rebase.c
+++ b/src/mesa/vbo/vbo_rebase.c
@@ -159,7 +159,8 @@ void vbo_rebase_prims( struct gl_context *ctx,
       void *ptr;
 
       if (map_ib) 
-	 ctx->Driver.MapBuffer(ctx, GL_READ_ONLY_ARB, ib->obj);
+	 ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
+				    ib->obj);
 
 
       ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index f90f00c5aae..ad36e93329c 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -232,10 +232,10 @@ map_vertex_store(struct gl_context *ctx,
    assert(vertex_store->bufferobj);
    assert(!vertex_store->buffer);
    vertex_store->buffer =
-      (GLfloat *) ctx->Driver.MapBuffer(ctx,
-                                        GL_WRITE_ONLY,      /* not used */
-                                        vertex_store->
-                                        bufferobj);
+      (GLfloat *) ctx->Driver.MapBufferRange(ctx, 0,
+					     vertex_store->bufferobj->Size,
+					     GL_MAP_WRITE_BIT,    /* not used */
+					     vertex_store->bufferobj);
 
    assert(vertex_store->buffer);
    return vertex_store->buffer + vertex_store->used;
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index e7996f29307..6cda831aa85 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -217,9 +217,11 @@ static void
 vbo_save_loopback_vertex_list(struct gl_context *ctx,
                               const struct vbo_save_vertex_list *list)
 {
-   const char *buffer = ctx->Driver.MapBuffer(ctx, 
-					      GL_READ_ONLY, /* ? */
-                                              list->vertex_store->bufferobj);
+   const char *buffer =
+      ctx->Driver.MapBufferRange(ctx, 0,
+				 list->vertex_store->bufferobj->Size,
+				 GL_MAP_READ_BIT, /* ? */
+				 list->vertex_store->bufferobj);
 
    vbo_loopback_vertex_list(ctx,
                             (const GLfloat *)(buffer + list->buffer_offset),
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index 8dc5aa0ed76..40906e38917 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -444,7 +444,7 @@ replay_init( struct copy_context *copy )
 	 copy->vertex_size += attr_size(copy->array[i]);
       
 	 if (_mesa_is_bufferobj(vbo) && !_mesa_bufferobj_mapped(vbo)) 
-	    ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, vbo);
+	    ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo);
 
 	 copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer,
 						 copy->array[i]->Ptr);
@@ -459,7 +459,8 @@ replay_init( struct copy_context *copy )
     */
    if (_mesa_is_bufferobj(copy->ib->obj) &&
        !_mesa_bufferobj_mapped(copy->ib->obj)) 
-      ctx->Driver.MapBuffer(ctx, GL_READ_ONLY, copy->ib->obj);
+      ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
+				 copy->ib->obj);
 
    srcptr = (const GLubyte *) ADD_POINTERS(copy->ib->obj->Pointer,
                                            copy->ib->ptr);

From 655c7d7498390ab69623e308abe5db4a8489a25c Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 22 Aug 2011 00:14:51 -0700
Subject: [PATCH 537/600] mesa: Only map the necessary buffer range in
 vbo_get_minmax_index

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/vbo/vbo_exec_array.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 484e1a85e41..18719d5f537 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -95,10 +95,25 @@ vbo_get_minmax_index(struct gl_context *ctx,
    GLuint i;
 
    if (_mesa_is_bufferobj(ib->obj)) {
-      const GLvoid *map =
-         ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
-				    ib->obj);
-      indices = ADD_POINTERS(map, ib->ptr);
+      unsigned map_size;
+
+      switch (ib->type) {
+      case GL_UNSIGNED_INT:
+	 map_size = count * sizeof(GLuint);
+	 break;
+      case GL_UNSIGNED_SHORT:
+	 map_size = count * sizeof(GLushort);
+	 break;
+      case GL_UNSIGNED_BYTE:
+	 map_size = count * sizeof(GLubyte);
+	 break;
+      default:
+	 assert(0);
+	 map_size = 0;
+      }
+
+      indices = ctx->Driver.MapBufferRange(ctx, (GLsizeiptr) ib->ptr, map_size,
+					   GL_MAP_READ_BIT, ib->obj);
    } else {
       indices = ib->ptr;
    }

From 2ea1ff38164d95f8291ef2e5dfe2cb13936a60f2 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 22 Aug 2011 00:31:19 -0700
Subject: [PATCH 538/600] tnl: Only map the necessary buffer range in
 bind_indices

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/tnl/t_draw.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index a23d1754391..86af4b7cfe2 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -347,16 +347,32 @@ static void bind_indices( struct gl_context *ctx,
    }
 
    if (ib->obj->Name && !ib->obj->Pointer) {
+      unsigned map_size;
+
+      switch (ib->type) {
+      case GL_UNSIGNED_BYTE:
+	 map_size = ib->count * sizeof(GLubyte);
+	 break;
+      case GL_UNSIGNED_SHORT:
+	 map_size = ib->count * sizeof(GLushort);
+	 break;
+      case GL_UNSIGNED_INT:
+	 map_size = ib->count * sizeof(GLuint);
+	 break;
+      default:
+	 assert(0);
+	 map_size = 0;
+      }
+
       bo[*nr_bo] = ib->obj;
       (*nr_bo)++;
-      ctx->Driver.MapBufferRange(ctx, 0, ib->obj->Size, GL_MAP_READ_BIT,
-				 ib->obj);
-
+      ptr = ctx->Driver.MapBufferRange(ctx, (GLsizeiptr) ib->ptr, map_size,
+				       GL_MAP_READ_BIT, ib->obj);
       assert(ib->obj->Pointer);
+   } else {
+      ptr = ib->ptr;
    }
 
-   ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
-
    if (ib->type == GL_UNSIGNED_INT && VB->Primitive[0].basevertex == 0) {
       VB->Elts = (GLuint *) ptr;
    }

From 0d636213d491f88726155e12c3b445a88e0f1cd8 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 22 Aug 2011 10:52:47 -0700
Subject: [PATCH 539/600] i965: Only map the necessary buffer range in
 brw_prepare_indices

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/drivers/dri/i965/brw_draw_upload.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index ed1964f5a6f..7bc69c612e3 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -690,11 +690,10 @@ static void brw_prepare_indices(struct brw_context *brw)
        */
        if ((get_size(index_buffer->type) - 1) & offset) {
            GLubyte *map = ctx->Driver.MapBufferRange(ctx,
-						     0,
-						     bufferobj->Size,
+						     offset,
+						     ib_size,
 						     GL_MAP_WRITE_BIT,
 						     bufferobj);
-           map += offset;
 
 	   intel_upload_data(&brw->intel, map, ib_size, ib_type_size,
 			     &bo, &offset);

From 9bda86c3550090f5370ef2d010e659758dc9a540 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sun, 21 Aug 2011 11:58:30 +0800
Subject: [PATCH 540/600] auxiliary/os: add wrappers for mmap/munmap

The use of mmap() in winsys requires large file support.  Not all OSes
have LFS so a wrapper should be used.  In particular, os_mmap() should
call __mmap2() on Android.
---
 src/gallium/auxiliary/os/os_mman.h | 87 ++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 src/gallium/auxiliary/os/os_mman.h

diff --git a/src/gallium/auxiliary/os/os_mman.h b/src/gallium/auxiliary/os/os_mman.h
new file mode 100644
index 00000000000..b48eb053023
--- /dev/null
+++ b/src/gallium/auxiliary/os/os_mman.h
@@ -0,0 +1,87 @@
+/**************************************************************************
+ *
+ * Copyright 2011 LunarG, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * OS independent memory mapping (with large file support).
+ *
+ * @author Chia-I Wu <olvaffe@gmail.com>
+ */
+
+#ifndef _OS_MMAN_H_
+#define _OS_MMAN_H_
+
+
+#include "pipe/p_config.h"
+#include "pipe/p_compiler.h"
+
+#if defined(PIPE_OS_UNIX)
+#  ifndef _FILE_OFFSET_BITS
+#    error _FILE_OFFSET_BITS must be defined to 64
+#  endif
+#  include <sys/mman.h>
+#else
+#  error Unsupported OS
+#endif
+
+#if defined(PIPE_OS_ANDROID)
+#  include <errno.h> /* for EINVAL */
+#endif
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+
+#if defined(PIPE_OS_ANDROID)
+
+extern void *__mmap2(void *, size_t, int, int, int, size_t);
+
+static INLINE void *os_mmap(void *addr, size_t length, int prot, int flags, int fd, loff_t offset)
+{
+   /* offset must be aligned to 4096 (not necessarily the page size) */
+   if (unlikely(offset & 4095)) {
+      errno = EINVAL;
+      return MAP_FAILED;
+   }
+
+   return __mmap2(addr, length, prot, flags, fd, (size_t) (offset >> 12));
+}
+
+#else
+/* assume large file support exists */
+#  define os_mmap(addr, length, prot, flags, fd, offset) mmap(addr, length, prot, flags, fd, offset)
+#endif
+
+#define os_munmap(addr, length) munmap(addr, length)
+
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _OS_MMAN_H_ */

From 70b1837dfb1b282ad9efcaeec4f9c8da5f9a74d8 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olv@lunarg.com>
Date: Sun, 21 Aug 2011 12:31:45 +0800
Subject: [PATCH 541/600] winsys/radeon: use os_mmap() for memory mapping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

os_mmap() guarantees large file support across OSes.

Reviewed-by: Marek Olšák <maraeo@gmail.com>
---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index adfbefd897b..b45efe5f49c 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -31,11 +31,11 @@
 #include "util/u_memory.h"
 #include "util/u_simple_list.h"
 #include "os/os_thread.h"
+#include "os/os_mman.h"
 
 #include "state_tracker/drm_driver.h"
 
 #include <sys/ioctl.h>
-#include <sys/mman.h>
 #include <xf86drm.h>
 #include <errno.h>
 
@@ -160,7 +160,7 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
     }
 
     if (bo->ptr)
-        munmap(bo->ptr, bo->size);
+        os_munmap(bo->ptr, bo->size);
 
     /* Close object. */
     args.handle = bo->handle;
@@ -278,7 +278,7 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf,
         return NULL;
     }
 
-    ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
+    ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
                bo->rws->fd, args.addr_ptr);
     if (ptr == MAP_FAILED) {
         pipe_mutex_unlock(bo->map_mutex);

From 1284d5b25507a56634519ac385cbc00a00b94417 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Sun, 21 Aug 2011 20:37:47 +0800
Subject: [PATCH 542/600] winsys/svga: use os_mmap() for memory mapping

os_mmap() guarantees large file support across OSes.
---
 src/gallium/winsys/svga/drm/vmw_screen_ioctl.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
index d92ba389d35..afdbd44458d 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
@@ -42,7 +42,8 @@
 #include "xf86drm.h"
 #include "vmwgfx_drm.h"
 
-#include <sys/mman.h>
+#include "os/os_mman.h"
+
 #include <errno.h>
 #include <unistd.h>
 
@@ -94,7 +95,7 @@ static void
 vmw_ioctl_fifo_unmap(struct vmw_winsys_screen *vws, void *mapping)
 {
    VMW_FUNC;
-   (void)munmap(mapping, getpagesize());
+   (void)os_munmap(mapping, getpagesize());
 }
 
 
@@ -106,7 +107,7 @@ vmw_ioctl_fifo_map(struct vmw_winsys_screen *vws,
 
    VMW_FUNC;
 
-   map = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED,
+   map = os_mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED,
 	      vws->ioctl.drm_fd, fifo_offset);
 
    if (map == MAP_FAILED) {
@@ -362,7 +363,7 @@ vmw_ioctl_region_destroy(struct vmw_region *region)
               region->ptr.gmrId, region->ptr.offset);
 
    if (region->data) {
-      munmap(region->data, region->size);
+      os_munmap(region->data, region->size);
       region->data = NULL;
    }
 
@@ -388,7 +389,7 @@ vmw_ioctl_region_map(struct vmw_region *region)
               region->ptr.gmrId, region->ptr.offset);
 
    if (region->data == NULL) {
-      map = mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+      map = os_mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED,
 		 region->drm_fd, region->map_handle);
       if (map == MAP_FAILED) {
 	 debug_printf("%s: Map failed.\n", __FUNCTION__);

From cc9a8915f093c57d2748370d18ed47f66c933013 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 24 Aug 2011 13:27:06 +0100
Subject: [PATCH 543/600] r600g: fill out missing entries in opcode tables.

this just adds the missing opcodes as unsupported.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/r600_shader.c | 36 ++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 406e87bdb00..c37bb729ce3 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -3366,6 +3366,18 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_CASE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_DEFAULT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_ENDSWITCH,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
 	{TGSI_OPCODE_LAST,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 };
 
@@ -3524,6 +3536,18 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
 	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 };
 
@@ -3682,5 +3706,17 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_CASE,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_DEFAULT,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_ENDSWITCH,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_RESINFO,	0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
+	{TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
 	{TGSI_OPCODE_LAST,	0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 };

From c3ad95ed40fca72dbc6c157de2948cb6d074aaac Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 24 Aug 2011 07:55:04 -0600
Subject: [PATCH 544/600] x11: add missing comma to fix compilation

---
 src/mesa/drivers/x11/xm_dd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index 3a5d0ae04fc..81d000b3952 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -588,7 +588,7 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx,
                         "glDrawPixels(invalid PBO access)");
             return;
          }
-         buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0
+         buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0,
 						      unpack->BufferObj->Size,
 						      GL_MAP_READ_BIT,
 						      unpack->BufferObj);

From a231d245514b43cf547ccb996b68efc0186e6821 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 14 Jul 2011 20:57:34 -0600
Subject: [PATCH 545/600] mesa: add gl_texture_image::Face, Level fields

Several drivers have these fields in their subclasses of gl_texture_image.
They'll be useful for core Mesa too...

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/main/mtypes.h   | 3 +++
 src/mesa/main/teximage.c | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 8b3650321db..f2eb889feb4 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1279,6 +1279,9 @@ struct gl_texture_image
    GLboolean _IsPowerOfTwo;	/**< Are all dimensions powers of two? */
 
    struct gl_texture_object *TexObject;  /**< Pointer back to parent object */
+   GLuint Level;                /**< Which mipmap level am I? */
+   /** Cube map face: index into gl_texture_object::Image[] array */
+   GLuint Face;
 
    FetchTexelFuncC FetchTexelc;	/**< GLchan texel fetch function pointer */
    FetchTexelFuncF FetchTexelf;	/**< Float texel fetch function pointer */
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 27717cfb0f5..a005d2935fa 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -571,6 +571,8 @@ _mesa_set_tex_image(struct gl_texture_object *tObj,
 
    /* Set the 'back' pointer */
    texImage->TexObject = tObj;
+   texImage->Level = level;
+   texImage->Face = face;
 }
 
 

From 7dae1aaf142999e3cfeafb13d30abda667d66d87 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 14 Jul 2011 20:57:34 -0600
Subject: [PATCH 546/600] intel: use new gl_texture_image:Face, Level fields

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/intel/intel_blit.c       |  4 +--
 src/mesa/drivers/dri/intel/intel_fbo.c        | 12 +++----
 .../drivers/dri/intel/intel_mipmap_tree.c     |  2 +-
 src/mesa/drivers/dri/intel/intel_tex.c        |  5 ---
 src/mesa/drivers/dri/intel/intel_tex_copy.c   |  4 +--
 src/mesa/drivers/dri/intel/intel_tex_image.c  | 35 ++++++++-----------
 src/mesa/drivers/dri/intel/intel_tex_obj.h    |  5 ---
 .../drivers/dri/intel/intel_tex_subimage.c    |  6 ++--
 .../drivers/dri/intel/intel_tex_validate.c    | 12 +++----
 9 files changed, 34 insertions(+), 51 deletions(-)

diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 30be1b9382f..b18dd2922d9 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -541,8 +541,8 @@ intel_set_teximage_alpha_to_one(struct gl_context *ctx,
 
    /* get dest x/y in destination texture */
    intel_miptree_get_image_offset(intel_image->mt,
-				  intel_image->level,
-				  intel_image->face,
+				  intel_image->base.Level,
+				  intel_image->base.Face,
 				  0,
 				  &image_x, &image_y);
 
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 65ad621e770..754f9f202d1 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -600,8 +600,8 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb,
 
    /* compute offset of the particular 2D image within the texture region */
    intel_miptree_get_image_offset(intel_image->mt,
-				  intel_image->level,
-				  intel_image->face,
+				  intel_image->base.Level,
+				  intel_image->base.Face,
 				  zoffset,
 				  &dst_x, &dst_y);
 
@@ -728,8 +728,8 @@ intel_render_texture(struct gl_context * ctx,
 
       new_mt = intel_miptree_create(intel, image->TexObject->Target,
 				    intel_image->base.TexFormat,
-				    intel_image->level,
-				    intel_image->level,
+				    intel_image->base.Level,
+				    intel_image->base.Level,
 				    intel_image->base.Width,
 				    intel_image->base.Height,
 				    intel_image->base.Depth,
@@ -737,8 +737,8 @@ intel_render_texture(struct gl_context * ctx,
 
       intel_miptree_image_copy(intel,
                                new_mt,
-                               intel_image->face,
-			       intel_image->level,
+			       intel_image->base.Face,
+			       intel_image->base.Level,
 			       old_mt);
 
       intel_miptree_release(intel, &intel_image->mt);
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 4e711de1ce1..f36240d7f1d 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -227,7 +227,7 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt,
                           struct gl_texture_image *image)
 {
    struct intel_texture_image *intelImage = intel_texture_image(image);
-   GLuint level = intelImage->level;
+   GLuint level = intelImage->base.Level;
 
    /* Images with borders are never pulled into mipmap trees. */
    if (image->Border)
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
index 21c4a1dddba..ee0cd252375 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.c
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -95,17 +95,12 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target,
       if (!_mesa_is_format_compressed(first_image->TexFormat)) {
          GLuint nr_faces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
          GLuint face, i;
-         /* Update the level information in our private data in the new images,
-          * since it didn't get set as part of a normal TexImage path.
-          */
          for (face = 0; face < nr_faces; face++) {
             for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
                struct intel_texture_image *intelImage =
                   intel_texture_image(texObj->Image[face][i]);
                if (!intelImage)
                   break;
-               intelImage->level = i;
-               intelImage->face = face;
                /* Unreference the miptree to signal that the new Data is a
                 * bare pointer from mesa.
                 */
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index e89e91dee3e..600bd1251e0 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -118,8 +118,8 @@ intel_copy_texsubimage(struct intel_context *intel,
 
       /* get dest x/y in destination texture */
       intel_miptree_get_image_offset(intelImage->mt,
-				     intelImage->level,
-				     intelImage->face,
+				     intelImage->base.Level,
+				     intelImage->base.Face,
 				     0,
 				     &image_x, &image_y);
 
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 1f8b885bbec..4ee66847255 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -63,7 +63,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
    if (intelImage->base.Border)
       return NULL;
 
-   if (intelImage->level > intelObj->base.BaseLevel &&
+   if (intelImage->base.Level > intelObj->base.BaseLevel &&
        (intelImage->base.Width == 1 ||
         (intelObj->base.Target != GL_TEXTURE_1D &&
          intelImage->base.Height == 1) ||
@@ -74,19 +74,19 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
        * likely base level width/height/depth for a full mipmap stack
        * from this info, so just allocate this one level.
        */
-      firstLevel = intelImage->level;
-      lastLevel = intelImage->level;
+      firstLevel = intelImage->base.Level;
+      lastLevel = intelImage->base.Level;
    } else {
       /* If this image disrespects BaseLevel, allocate from level zero.
        * Usually BaseLevel == 0, so it's unlikely to happen.
        */
-      if (intelImage->level < intelObj->base.BaseLevel)
+      if (intelImage->base.Level < intelObj->base.BaseLevel)
 	 firstLevel = 0;
       else
 	 firstLevel = intelObj->base.BaseLevel;
 
       /* Figure out image dimensions at start level. */
-      for (i = intelImage->level; i > firstLevel; i--) {
+      for (i = intelImage->base.Level; i > firstLevel; i--) {
 	 width <<= 1;
 	 if (height != 1)
 	    height <<= 1;
@@ -101,7 +101,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel,
        */
       if ((intelObj->base.Sampler.MinFilter == GL_NEAREST ||
 	   intelObj->base.Sampler.MinFilter == GL_LINEAR) &&
-	  intelImage->level == firstLevel &&
+	  intelImage->base.Level == firstLevel &&
 	  (intel->gen < 4 || firstLevel == 0)) {
 	 lastLevel = firstLevel;
       } else {
@@ -186,8 +186,8 @@ try_pbo_upload(struct intel_context *intel,
    else
       src_stride = width;
 
-   intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
-				  intelImage->face, 0,
+   intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level,
+				  intelImage->base.Face, 0,
 				  &dst_x, &dst_y);
 
    dst_stride = intelImage->mt->region->pitch;
@@ -243,8 +243,8 @@ try_pbo_zcopy(struct intel_context *intel,
    else
       src_stride = width;
 
-   intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
-				  intelImage->face, 0,
+   intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level,
+				  intelImage->base.Face, 0,
 				  &dst_x, &dst_y);
 
    dst_stride = intelImage->mt->region->pitch;
@@ -407,9 +407,6 @@ intelTexImage(struct gl_context * ctx,
    DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
        _mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
 
-   intelImage->face = _mesa_tex_target_to_face(target);
-   intelImage->level = level;
-
    if (_mesa_is_format_compressed(texImage->TexFormat)) {
       texelBytes = 0;
    }
@@ -514,8 +511,8 @@ intelTexImage(struct gl_context * ctx,
 	 }
          texImage->Data = intel_miptree_image_map(intel,
                                                   intelImage->mt,
-                                                  intelImage->face,
-                                                  intelImage->level,
+                                                  intelImage->base.Face,
+                                                  intelImage->base.Level,
                                                   &dstRowStride,
                                                   intelImage->base.ImageOffsets);
       }
@@ -684,8 +681,8 @@ intel_get_tex_image(struct gl_context * ctx, GLenum target, GLint level,
       intelImage->base.Data =
          intel_miptree_image_map(intel,
                                  intelImage->mt,
-                                 intelImage->face,
-                                 intelImage->level,
+                                 intelImage->base.Face,
+                                 intelImage->base.Level,
                                  &intelImage->base.RowStride,
                                  intelImage->base.ImageOffsets);
       intelImage->base.RowStride /= intelImage->mt->cpp;
@@ -816,8 +813,6 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
 			      rb->region->width, rb->region->height, 1,
 			      0, internalFormat, texFormat);
 
-   intelImage->face = _mesa_tex_target_to_face(target);
-   intelImage->level = level;
    texImage->RowStride = rb->region->pitch;
    intel_miptree_reference(&intelImage->mt, intelObj->mt);
 
@@ -874,8 +869,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
 			      image->region->width, image->region->height, 1,
 			      0, image->internal_format, image->format);
 
-   intelImage->face = _mesa_tex_target_to_face(target);
-   intelImage->level = 0;
    texImage->RowStride = image->region->pitch;
    intel_miptree_reference(&intelImage->mt, intelObj->mt);
 
diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h
index a9ae2ec5429..e7a4318b8d8 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_obj.h
+++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h
@@ -52,11 +52,6 @@ struct intel_texture_image
 {
    struct gl_texture_image base;
 
-   /* These aren't stored in gl_texture_image 
-    */
-   GLuint level;
-   GLuint face;
-
    /* If intelImage->mt != NULL, image data is stored here.
     * Else if intelImage->base.Data != NULL, image is stored there.
     * Else there is no image data.
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index 8b43c406cf9..5fd2cc36234 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -113,7 +113,7 @@ intelTexSubimage(struct gl_context * ctx,
 	 dstRowStride = pitch;
 
 	 intel_miptree_get_image_offset(intelImage->mt, level,
-					intelImage->face, 0,
+					intelImage->base.Face, 0,
 					&blit_x, &blit_y);
 	 blit_x += xoffset;
 	 blit_y += yoffset;
@@ -122,8 +122,8 @@ intelTexSubimage(struct gl_context * ctx,
       } else {
 	 texImage->Data = intel_miptree_image_map(intel,
 						  intelImage->mt,
-						  intelImage->face,
-						  intelImage->level,
+						  intelImage->base.Face,
+						  intelImage->base.Level,
 						  &dstRowStride,
 						  texImage->ImageOffsets);
       }
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index 7135a6276fe..31ac689ad77 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -42,8 +42,8 @@ copy_image_data_to_tree(struct intel_context *intel,
        */
       intel_miptree_image_copy(intel,
                                intelObj->mt,
-                               intelImage->face,
-                               intelImage->level, intelImage->mt);
+                               intelImage->base.Face,
+                               intelImage->base.Level, intelImage->mt);
 
       intel_miptree_release(intel, &intelImage->mt);
    }
@@ -54,8 +54,8 @@ copy_image_data_to_tree(struct intel_context *intel,
        */
       intel_miptree_image_data(intel,
                                intelObj->mt,
-                               intelImage->face,
-                               intelImage->level,
+                               intelImage->base.Face,
+                               intelImage->base.Level,
                                intelImage->base.Data,
                                intelImage->base.RowStride,
                                intelImage->base.RowStride *
@@ -177,8 +177,8 @@ intel_tex_map_level_images(struct intel_context *intel,
 	 intelImage->base.Data =
 	    intel_miptree_image_map(intel,
 				    intelImage->mt,
-				    intelImage->face,
-				    intelImage->level,
+				    intelImage->base.Face,
+				    intelImage->base.Level,
 				    &intelImage->base.RowStride,
 				    intelImage->base.ImageOffsets);
 	 /* convert stride to texels, not bytes */

From 6dbad425bc423eb7db7c99aab161955c7b4cdc4c Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 14 Jul 2011 20:57:34 -0600
Subject: [PATCH 547/600] st/mesa: remove st_texture_image::face,level fields

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/state_tracker/st_cb_texture.c | 30 +++++++++++++-------------
 src/mesa/state_tracker/st_gen_mipmap.c |  1 -
 src/mesa/state_tracker/st_texture.c    |  4 ++--
 src/mesa/state_tracker/st_texture.h    |  5 -----
 4 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 25f08aa4d09..a3b2ba9e78d 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -344,7 +344,7 @@ guess_and_alloc_texture(struct st_context *st,
                               stImage->base.Width2,
                               stImage->base.Height2,
                               stImage->base.Depth2,
-                              stImage->level,
+                              stImage->base.Level,
                               &width, &height, &depth)) {
       /* we can't determine the image size at level=0 */
       stObj->width0 = stObj->height0 = stObj->depth0 = 0;
@@ -367,7 +367,7 @@ guess_and_alloc_texture(struct st_context *st,
         stImage->base._BaseFormat == GL_DEPTH_COMPONENT ||
         stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) &&
        !stObj->base.GenerateMipmap &&
-       stImage->level == 0) {
+       stImage->base.Level == 0) {
       /* only alloc space for a single mipmap level */
       lastLevel = 0;
    }
@@ -506,8 +506,8 @@ st_TexImage(struct gl_context * ctx,
       assert(texImage->Depth == depth);
    }
 
-   stImage->face = _mesa_tex_target_to_face(target);
-   stImage->level = level;
+   stImage->base.Face = _mesa_tex_target_to_face(target);
+   stImage->base.Level = level;
 
    _mesa_set_fetch_functions(texImage, dims);
 
@@ -529,7 +529,7 @@ st_TexImage(struct gl_context * ctx,
    if (stObj->pt) {
       if (level > (GLint) stObj->pt->last_level ||
           !st_texture_match_image(stObj->pt, &stImage->base,
-                                  stImage->face, stImage->level)) {
+                                  stImage->base.Face, stImage->base.Level)) {
          DBG("release it\n");
          pipe_resource_reference(&stObj->pt, NULL);
          assert(!stObj->pt);
@@ -563,7 +563,7 @@ st_TexImage(struct gl_context * ctx,
     */
    if (stObj->pt &&
        st_texture_match_image(stObj->pt, &stImage->base,
-                              stImage->face, stImage->level)) {
+                              stImage->base.Face, stImage->base.Level)) {
 
       pipe_resource_reference(&stImage->pt, stObj->pt);
       assert(stImage->pt);
@@ -1501,8 +1501,8 @@ st_copy_texsubimage(struct gl_context *ctx,
          pipe->resource_copy_region(pipe,
                                     /* dest */
                                     stImage->pt,
-                                    stImage->level,
-                                    destX, destY, destZ + stImage->face,
+                                    stImage->base.Level,
+                                    destX, destY, destZ + stImage->base.Face,
                                     /* src */
                                     strb->texture,
                                     strb->surface->u.tex.level,
@@ -1524,9 +1524,9 @@ st_copy_texsubimage(struct gl_context *ctx,
          memset(&surf_tmpl, 0, sizeof(surf_tmpl));
          surf_tmpl.format = util_format_linear(stImage->pt->format);
          surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
-         surf_tmpl.u.tex.level = stImage->level;
-         surf_tmpl.u.tex.first_layer = stImage->face + destZ;
-         surf_tmpl.u.tex.last_layer = stImage->face + destZ;
+         surf_tmpl.u.tex.level = stImage->base.Level;
+         surf_tmpl.u.tex.first_layer = stImage->base.Face + destZ;
+         surf_tmpl.u.tex.last_layer = stImage->base.Face + destZ;
 
          dest_surface = pipe->create_surface(pipe, stImage->pt,
                                              &surf_tmpl);
@@ -1629,7 +1629,7 @@ copy_image_data_to_texture(struct st_context *st,
    /* debug checks */
    {
       const struct gl_texture_image *dstImage =
-         stObj->base.Image[stImage->face][dstLevel];
+         stObj->base.Image[stImage->base.Face][dstLevel];
       assert(dstImage);
       assert(dstImage->Width == stImage->base.Width);
       assert(dstImage->Height == stImage->base.Height);
@@ -1641,15 +1641,15 @@ copy_image_data_to_texture(struct st_context *st,
        */
       st_texture_image_copy(st->pipe,
                             stObj->pt, dstLevel,  /* dest texture, level */
-                            stImage->pt, stImage->level, /* src texture, level */
-                            stImage->face);
+                            stImage->pt, stImage->base.Level, /* src texture, level */
+                            stImage->base.Face);
 
       pipe_resource_reference(&stImage->pt, NULL);
    }
    else if (stImage->base.Data) {
       st_texture_image_data(st,
                             stObj->pt,
-                            stImage->face,
+                            stImage->base.Face,
                             dstLevel,
                             stImage->base.Data,
                             stImage->base.RowStride * 
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
index b0911294a7c..82ca4af7fe4 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -453,7 +453,6 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
                                  srcImage->TexFormat);
 
       stImage = st_texture_image(dstImage);
-      stImage->level = dstLevel;
 
       pipe_resource_reference(&stImage->pt, pt);
    }
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index 0e857fddcdd..232c286c1d1 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -221,8 +221,8 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage,
 
    DBG("%s \n", __FUNCTION__);
 
-   stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->level,
-                                         stImage->face + zoffset,
+   stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->base.Level,
+                                         stImage->base.Face + zoffset,
                                          usage, x, y, w, h);
 
    if (stImage->transfer)
diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h
index b822f47cf9e..50b7284e760 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -45,11 +45,6 @@ struct st_texture_image
 {
    struct gl_texture_image base;
 
-   /* These aren't stored in gl_texture_image 
-    */
-   GLuint level;
-   GLuint face;
-
    /* If stImage->pt != NULL, image data is stored here.
     * Else if stImage->base.Data != NULL, image is stored there.
     * Else there is no image data.

From 4990e7e9fb6d64f5e4c23915767e5cea32c02965 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 3 Aug 2011 18:29:36 -0700
Subject: [PATCH 548/600] radeon: Fix flushing before writing a teximage's BO
 when !t->bo.

Before, if we ended up here without a BO for our image, but did choose
a miptree that had active rendering in the command buffer, our
teximage data would jump ahead of the rendering using the old texture
contents.

This showed up as breakage in gen-teximage and friends in the
following commit.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/radeon/radeon_texture.c | 24 ++++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index ce0df32bfe4..ad7e4c146a4 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -787,18 +787,6 @@ static void radeon_teximage(
 	radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
 			"%s %dd: texObj %p, texImage %p, face %d, level %d\n",
 			__func__, dims, texObj, texImage, face, level);
-	{
-		struct radeon_bo *bo;
-		bo = !image->mt ? image->bo : image->mt->bo;
-		if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
-			radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
-				"%s Calling teximage for texture that is "
-				"queued for GPU processing.\n",
-				__func__);
-			radeon_firevertices(rmesa);
-		}
-	}
-
 
 	t->validated = GL_FALSE;
 
@@ -820,6 +808,18 @@ static void radeon_teximage(
 		}
 	}
 
+	{
+		struct radeon_bo *bo;
+		bo = !image->mt ? image->bo : image->mt->bo;
+		if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
+			radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+				"%s Calling teximage for texture that is "
+				"queued for GPU processing.\n",
+				__func__);
+			radeon_firevertices(rmesa);
+		}
+	}
+
 	/* Upload texture image; note that the spec allows pixels to be NULL */
 	if (compressed) {
 		pixels = _mesa_validate_pbo_compressed_teximage(

From ddd6e5b8bce96cfcb72c7e7412296c9f39bdd5d7 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 29 Jul 2011 15:19:43 -0700
Subject: [PATCH 549/600] swrast: Remove swrast eject/validate texture image
 code.

No driver used the eject function, or set the validate hook that made
that function do anything.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/swrast/s_context.c | 79 -------------------------------------
 1 file changed, 79 deletions(-)

diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c
index 4434f11b990..792b528ee34 100644
--- a/src/mesa/swrast/s_context.c
+++ b/src/mesa/swrast/s_context.c
@@ -417,84 +417,6 @@ _swrast_validate_blend_func(struct gl_context *ctx, GLuint n, const GLubyte mask
    swrast->BlendFunc( ctx, n, mask, src, dst, chanType );
 }
 
-
-/**
- * Make sure we have texture image data for all the textures we may need
- * for subsequent rendering.
- */
-static void
-_swrast_validate_texture_images(struct gl_context *ctx)
-{
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
-   GLuint u;
-
-   if (!swrast->ValidateTextureImage || !ctx->Texture._EnabledUnits) {
-      /* no textures enabled, or no way to validate images! */
-      return;
-   }
-
-   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
-      if (ctx->Texture.Unit[u]._ReallyEnabled) {
-         struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
-         ASSERT(texObj);
-         if (texObj) {
-            GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-            GLuint face;
-            for (face = 0; face < numFaces; face++) {
-               GLint lvl;
-               for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
-                  struct gl_texture_image *texImg = texObj->Image[face][lvl];
-                  if (texImg && !texImg->Data) {
-                     swrast->ValidateTextureImage(ctx, texObj, face, lvl);
-                     ASSERT(texObj->Image[face][lvl]->Data);
-                  }
-               }
-            }
-         }
-      }
-   }
-}
-
-
-/**
- * Free the texture image data attached to all currently enabled
- * textures.  Meant to be called by device drivers when transitioning
- * from software to hardware rendering.
- */
-void
-_swrast_eject_texture_images(struct gl_context *ctx)
-{
-   GLuint u;
-
-   if (!ctx->Texture._EnabledUnits) {
-      /* no textures enabled */
-      return;
-   }
-
-   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
-      if (ctx->Texture.Unit[u]._ReallyEnabled) {
-         struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
-         ASSERT(texObj);
-         if (texObj) {
-            GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-            GLuint face;
-            for (face = 0; face < numFaces; face++) {
-               GLint lvl;
-               for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
-                  struct gl_texture_image *texImg = texObj->Image[face][lvl];
-                  if (texImg && texImg->Data) {
-                     _mesa_free_texmemory(texImg->Data);
-                     texImg->Data = NULL;
-                  }
-               }
-            }
-         }
-      }
-   }
-}
-
-
-
 static void
 _swrast_sleep( struct gl_context *ctx, GLbitfield new_state )
 {
@@ -640,7 +562,6 @@ _swrast_validate_derived( struct gl_context *ctx )
 
       if (swrast->NewState & (_NEW_TEXTURE | _NEW_PROGRAM)) {
          _swrast_update_texture_samplers( ctx );
-         _swrast_validate_texture_images(ctx);
       }
 
       if (swrast->NewState & (_NEW_COLOR | _NEW_PROGRAM))

From 43968261e41aa7be915711451927a4e28c973690 Mon Sep 17 00:00:00 2001
From: Paul Berry <stereotype441@gmail.com>
Date: Tue, 16 Aug 2011 14:09:32 -0700
Subject: [PATCH 550/600] docs: Document coding style conventions

This patch documents some Mesa coding style conventions that came up
during the discussion of commit 67b5a32 (Perform implicit type
conversions on function call out parameters).
---
 docs/devinfo.html | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/docs/devinfo.html b/docs/devinfo.html
index 8887dd02624..c0966480ab7 100644
--- a/docs/devinfo.html
+++ b/docs/devinfo.html
@@ -71,6 +71,13 @@ well documented.  Also, strive to write clean, easily understandable code.
 If you use tabs, set them to 8 columns
 </p>
 
+<p>
+Line width: the preferred width to fill comments and code in Mesa is 78
+columns.  Exceptions are sometimes made for clarity (e.g. tabular data is
+sometimes filled to a much larger width so that extraneous carriage returns
+don't obscure the table).
+</p>
+
 <p>
 Brace example:
 </p>
@@ -81,10 +88,26 @@ Brace example:
 	else {
 	   bar;
 	}
+
+	switch (condition) {
+	case 0:
+	   foo();
+	   break;
+
+	case 1: {
+	   ...
+	   break;
+	}
+
+	default:
+	   ...
+	   break;
+	}
 </pre>
 
 <p>
 Here's the GNU indent command which will best approximate my preferred style:
+(Note that it won't format switch statements in the preferred way)
 </p>
 <pre>
 	indent -br -i3 -npcs --no-tabs infile.c -o outfile.c

From 473cf0633959aa3fb965e27499d4f4ca60e6cafd Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 24 Aug 2011 14:08:03 -0600
Subject: [PATCH 551/600] llvmpipe: add more restrict keywords

Put restrict in the function definitions to silence MSVC warnings
about incompatible assignments in "func = lp_tile_foobar;" when func
was declared with restrict keywords but the rhs function wasn't.

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
 src/gallium/drivers/llvmpipe/lp_tile_soa.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index f4324e69971..a2795b604d2 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -75,7 +75,7 @@ def generate_format_read(format, dst_channel, dst_native_type, dst_suffix):
     src_native_type = native_type(format)
 
     print 'static void'
-    print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type)
+    print 'lp_tile_%s_swizzle_%s(%s * restrict dst, const uint8_t * restrict src, unsigned src_stride, unsigned x0, unsigned y0)' % (name, dst_suffix, dst_native_type)
     print '{'
     print '   unsigned x, y;'
     print '   const uint8_t *src_row = src + y0*src_stride;'
@@ -273,7 +273,7 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix):
     name = format.short_name()
 
     print 'static void'
-    print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type)
+    print 'lp_tile_%s_unswizzle_%s(const %s * restrict src, uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0)' % (name, src_suffix, src_native_type)
     print '{'
     if format.layout == PLAIN \
         and format.colorspace == 'rgb' \
@@ -501,7 +501,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
     print 'void'
     print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y)' % (dst_suffix, dst_native_type)
     print '{'
-    print '   void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type
+    print '   void (*func)(%s * restrict dst, const uint8_t * restrict src, unsigned src_stride, unsigned x0, unsigned y0);' % dst_native_type
     print '#ifdef DEBUG'
     print '   lp_tile_swizzle_count += 1;'
     print '#endif'
@@ -539,7 +539,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix):
     print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y)' % (src_suffix, src_native_type)
     
     print '{'
-    print '   void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type
+    print '   void (*func)(const %s * restrict src, uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0);' % src_native_type
     print '#ifdef DEBUG'
     print '   lp_tile_unswizzle_count += 1;'
     print '#endif'

From 038d654bcb4e9d88eab420cefc3169d4845db4c9 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 24 Aug 2011 14:50:28 -0600
Subject: [PATCH 552/600] vbo: remove unused var, remove unneeded local var

---
 src/mesa/vbo/vbo_exec_draw.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 25436c6d6d2..8ffaaaa4876 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -260,8 +260,6 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
 static void
 vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
 {
-   GLenum target = GL_ARRAY_BUFFER_ARB;
-
    if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
       struct gl_context *ctx = exec->ctx;
       
@@ -295,7 +293,6 @@ void
 vbo_exec_vtx_map( struct vbo_exec_context *exec )
 {
    struct gl_context *ctx = exec->ctx;
-   const GLenum target = GL_ARRAY_BUFFER_ARB;
    const GLenum accessRange = GL_MAP_WRITE_BIT |  /* for MapBufferRange */
                               GL_MAP_INVALIDATE_RANGE_BIT |
                               GL_MAP_UNSYNCHRONIZED_BIT |
@@ -325,7 +322,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
       /* Need to allocate a new VBO */
       exec->vtx.buffer_used = 0;
 
-      ctx->Driver.BufferData(ctx, target, 
+      ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER_ARB,
                              VBO_VERT_BUFFER_SIZE, 
                              NULL, usage, exec->vtx.bufferobj);
 

From 707d614d100ae7fc72572b686e304dac47e11c87 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Wed, 24 Aug 2011 13:01:18 -0700
Subject: [PATCH 553/600] glsl: Make sure that Extensions.dummy_true is set to
 true

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Paul Berry <stereotype441@gmail.com>
---
 src/glsl/standalone_scaffolding.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp
index 696ea757e96..bbd7bb91310 100644
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -63,6 +63,8 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
 
    ctx->API = api;
 
+   ctx->Extensions.dummy_false = false;
+   ctx->Extensions.dummy_true = true;
    ctx->Extensions.ARB_ES2_compatibility = true;
    ctx->Extensions.ARB_draw_buffers = true;
    ctx->Extensions.ARB_draw_instanced = true;

From 8dda3f2bcb5723518c284d502d224fe9c27122fc Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 19 Aug 2011 14:41:29 +0800
Subject: [PATCH 554/600] r600g: share the source list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Factor out C_SOURCES from Makefile to Makefile.sources, and let
Makefile and SConscript share it.

Reviewed-by: Marek Olšák <maraeo@gmail.com>
---
 src/gallium/drivers/r600/Makefile         | 17 ++---------------
 src/gallium/drivers/r600/Makefile.sources | 15 +++++++++++++++
 src/gallium/drivers/r600/SConscript       | 18 ++----------------
 3 files changed, 19 insertions(+), 31 deletions(-)
 create mode 100644 src/gallium/drivers/r600/Makefile.sources

diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile
index 7e21e3e32b1..0e68fe99345 100644
--- a/src/gallium/drivers/r600/Makefile
+++ b/src/gallium/drivers/r600/Makefile
@@ -6,20 +6,7 @@ LIBNAME = r600
 LIBRARY_INCLUDES = \
 	$(shell pkg-config libdrm --cflags-only-I)
 
-C_SOURCES = \
-	r600_asm.c \
-	r600_blit.c \
-	r600_buffer.c \
-	r600_pipe.c \
-	r600_query.c \
-	r600_resource.c \
-	r600_shader.c \
-	r600_state.c \
-	r600_texture.c \
-	r700_asm.c \
-	evergreen_state.c \
-	eg_asm.c \
-	r600_translate.c \
-	r600_state_common.c
+# get C_SOURCES
+include Makefile.sources
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources
new file mode 100644
index 00000000000..0366394e5a2
--- /dev/null
+++ b/src/gallium/drivers/r600/Makefile.sources
@@ -0,0 +1,15 @@
+C_SOURCES := \
+	r600_asm.c \
+	r600_blit.c \
+	r600_buffer.c \
+	r600_pipe.c \
+	r600_query.c \
+	r600_resource.c \
+	r600_shader.c \
+	r600_state.c \
+	r600_texture.c \
+	r700_asm.c \
+	evergreen_state.c \
+	eg_asm.c \
+	r600_translate.c \
+	r600_state_common.c
diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript
index 19f07b2bef8..be12255e4d0 100644
--- a/src/gallium/drivers/r600/SConscript
+++ b/src/gallium/drivers/r600/SConscript
@@ -11,22 +11,8 @@ env.Append(CPPPATH = [
 
 r600 = env.ConvenienceLibrary(
     target = 'r600',
-    source = [
-        'r600_asm.c',
-        'r600_buffer.c',
-        'r600_blit.c',
-        'r600_pipe.c',
-        'r600_query.c',
-        'r600_resource.c',
-        'r600_shader.c',
-        'r600_state.c',
-        'r600_state_common.c',
-        'r600_texture.c',
-        'r600_translate.c',
-        'r700_asm.c',
-        'evergreen_state.c',
-        'eg_asm.c',
-    ])
+    source = env.ParseSourceList('Makefile.sources', 'C_SOURCES')
+    )
 
 env.Alias('r600', r600)
 

From 027a45e5cf7092a93922fea980264dc0c31f7df7 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 19 Aug 2011 14:58:57 +0800
Subject: [PATCH 555/600] winsys/r600: share the source list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Factor out C_SOURCES from Makefile to Makefile.sources, and let
Makefile and SConscript share it.

Reviewed-by: Marek Olšák <maraeo@gmail.com>
---
 src/gallium/winsys/r600/drm/Makefile         | 8 ++------
 src/gallium/winsys/r600/drm/Makefile.sources | 7 +++++++
 src/gallium/winsys/r600/drm/SConscript       | 8 +-------
 3 files changed, 10 insertions(+), 13 deletions(-)
 create mode 100644 src/gallium/winsys/r600/drm/Makefile.sources

diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile
index 5ad183d78ae..c23286c8197 100644
--- a/src/gallium/winsys/r600/drm/Makefile
+++ b/src/gallium/winsys/r600/drm/Makefile
@@ -4,12 +4,8 @@ include $(TOP)/configs/current
 
 LIBNAME = r600winsys
 
-C_SOURCES = \
-	evergreen_hw_context.c \
-	radeon_pciid.c \
-	r600_bo.c \
-	r600_drm.c \
-	r600_hw_context.c
+# get C_SOURCES
+include Makefile.sources
 
 LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r600 \
 		   -I$(TOP)/include \
diff --git a/src/gallium/winsys/r600/drm/Makefile.sources b/src/gallium/winsys/r600/drm/Makefile.sources
new file mode 100644
index 00000000000..97980170e6b
--- /dev/null
+++ b/src/gallium/winsys/r600/drm/Makefile.sources
@@ -0,0 +1,7 @@
+C_SOURCES := \
+	evergreen_hw_context.c \
+	radeon_pciid.c \
+	r600_bo.c \
+	r600_drm.c \
+	r600_hw_context.c
+
diff --git a/src/gallium/winsys/r600/drm/SConscript b/src/gallium/winsys/r600/drm/SConscript
index ca51b52ea72..2d0d80e8bb9 100644
--- a/src/gallium/winsys/r600/drm/SConscript
+++ b/src/gallium/winsys/r600/drm/SConscript
@@ -2,13 +2,7 @@ Import('*')
 
 env = env.Clone()
 
-r600_sources = [
-    'evergreen_hw_context.c',
-    'radeon_pciid.c',
-    'r600_bo.c',
-    'r600_drm.c',
-    'r600_hw_context.c',
-]
+r600_sources = env.ParseSourceList('Makefile.sources', 'C_SOURCES')
 
 env.PkgUseModules('DRM_RADEON')
 

From 689b45fb27c36db49d68eb99a015f68a651d75ef Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 19 Aug 2011 15:00:20 +0800
Subject: [PATCH 556/600] winsys/radeon: share the source list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Factor out C_SOURCES from Makefile to Makefile.sources, and let
Makefile and SConscript share it.

Reviewed-by: Marek Olšák <maraeo@gmail.com>
---
 src/gallium/winsys/radeon/drm/Makefile         | 6 ++----
 src/gallium/winsys/radeon/drm/Makefile.sources | 4 ++++
 src/gallium/winsys/radeon/drm/SConscript       | 6 +-----
 3 files changed, 7 insertions(+), 9 deletions(-)
 create mode 100644 src/gallium/winsys/radeon/drm/Makefile.sources

diff --git a/src/gallium/winsys/radeon/drm/Makefile b/src/gallium/winsys/radeon/drm/Makefile
index 913e6ad186a..68b9efebfa4 100644
--- a/src/gallium/winsys/radeon/drm/Makefile
+++ b/src/gallium/winsys/radeon/drm/Makefile
@@ -4,10 +4,8 @@ include $(TOP)/configs/current
 
 LIBNAME = radeonwinsys
 
-C_SOURCES = \
-	radeon_drm_bo.c \
-	radeon_drm_cs.c \
-	radeon_drm_winsys.c
+# get C_SOURCES
+include Makefile.sources
 
 LIBRARY_INCLUDES = -I$(TOP)/include \
 		   $(shell pkg-config libdrm --cflags-only-I)
diff --git a/src/gallium/winsys/radeon/drm/Makefile.sources b/src/gallium/winsys/radeon/drm/Makefile.sources
new file mode 100644
index 00000000000..1d18d6164d5
--- /dev/null
+++ b/src/gallium/winsys/radeon/drm/Makefile.sources
@@ -0,0 +1,4 @@
+C_SOURCES := \
+	radeon_drm_bo.c \
+	radeon_drm_cs.c \
+	radeon_drm_winsys.c
diff --git a/src/gallium/winsys/radeon/drm/SConscript b/src/gallium/winsys/radeon/drm/SConscript
index 2edb1e94645..e5048d6255d 100644
--- a/src/gallium/winsys/radeon/drm/SConscript
+++ b/src/gallium/winsys/radeon/drm/SConscript
@@ -2,11 +2,7 @@ Import('*')
 
 env = env.Clone()
 
-radeon_sources = [
-    'radeon_drm_bo.c',
-    'radeon_drm_cs.c',
-    'radeon_drm_winsys.c',
-]
+radeon_sources = env.ParseSourceList('Makefile.sources', 'C_SOURCES')
 
 env.PkgUseModules('DRM')
 

From 7b1972d7be8ed68676786288ac2d77b8d71d113d Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Fri, 19 Aug 2011 14:35:45 +0800
Subject: [PATCH 557/600] android: add support for r600g
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tested with a Radeon HD 6250.  SurfaceFlinger (the display server and
compositor) works.  2D apps with RGB or RGBA visuals work.  As for 3D
apps, some work but some don't (with serious rendering defects).

Reviewed-by: Marek Olšák <maraeo@gmail.com>
---
 Android.mk                                | 10 ++++--
 src/gallium/Android.mk                    |  6 ++++
 src/gallium/drivers/r600/Android.mk       | 42 ++++++++++++++++++++++
 src/gallium/targets/egl-static/Android.mk |  4 +++
 src/gallium/winsys/r600/drm/Android.mk    | 43 +++++++++++++++++++++++
 src/gallium/winsys/radeon/drm/Android.mk  | 40 +++++++++++++++++++++
 6 files changed, 143 insertions(+), 2 deletions(-)
 create mode 100644 src/gallium/drivers/r600/Android.mk
 create mode 100644 src/gallium/winsys/r600/drm/Android.mk
 create mode 100644 src/gallium/winsys/radeon/drm/Android.mk

diff --git a/Android.mk b/Android.mk
index 03acbaf2f1c..53c619ee50c 100644
--- a/Android.mk
+++ b/Android.mk
@@ -24,7 +24,7 @@
 # BOARD_GPU_DRIVERS should be defined.  The valid values are
 #
 #   classic drivers:
-#   gallium drivers: swrast
+#   gallium drivers: swrast r600g
 #
 # The main target is libGLES_mesa.  There is no classic drivers yet.
 
@@ -36,7 +36,7 @@ DRM_TOP := external/drm
 DRM_GRALLOC_TOP := hardware/drm_gralloc
 
 classic_drivers :=
-gallium_drivers := swrast
+gallium_drivers := swrast r600g
 
 MESA_GPU_DRIVERS := $(BOARD_GPU_DRIVERS)
 
@@ -110,6 +110,12 @@ gallium_DRIVERS :=
 # swrast
 gallium_DRIVERS += libmesa_pipe_softpipe libmesa_winsys_sw_android
 
+# r600g
+ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),)
+gallium_DRIVERS += libmesa_winsys_radeon
+gallium_DRIVERS += libmesa_pipe_r600 libmesa_winsys_r600
+endif
+
 #
 # Notes about the order here:
 #
diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index bcbabba9db9..b49a61b1ffd 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -34,5 +34,11 @@ SUBDIRS := \
 # swrast
 SUBDIRS += winsys/sw/android drivers/softpipe
 
+# r600g
+ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),)
+SUBDIRS += winsys/radeon/drm
+SUBDIRS += winsys/r600/drm drivers/r600
+endif
+
 mkfiles := $(patsubst %,$(GALLIUM_TOP)/%/Android.mk,$(SUBDIRS))
 include $(mkfiles)
diff --git a/src/gallium/drivers/r600/Android.mk b/src/gallium/drivers/r600/Android.mk
new file mode 100644
index 00000000000..994ae07789c
--- /dev/null
+++ b/src/gallium/drivers/r600/Android.mk
@@ -0,0 +1,42 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_CFLAGS := -std=c99
+
+LOCAL_C_INCLUDES := \
+	$(DRM_TOP) \
+	$(DRM_TOP)/include/drm
+
+LOCAL_MODULE := libmesa_pipe_r600
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/targets/egl-static/Android.mk b/src/gallium/targets/egl-static/Android.mk
index 8a65585b72b..ebc89ead454 100644
--- a/src/gallium/targets/egl-static/Android.mk
+++ b/src/gallium/targets/egl-static/Android.mk
@@ -46,6 +46,10 @@ LOCAL_C_INCLUDES := \
 # swrast
 LOCAL_CFLAGS += -DGALLIUM_SOFTPIPE
 
+ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -D_EGL_PIPE_R600=1
+endif
+
 LOCAL_MODULE := libmesa_egl_gallium
 
 include $(GALLIUM_COMMON_MK)
diff --git a/src/gallium/winsys/r600/drm/Android.mk b/src/gallium/winsys/r600/drm/Android.mk
new file mode 100644
index 00000000000..eb79caa19ca
--- /dev/null
+++ b/src/gallium/winsys/r600/drm/Android.mk
@@ -0,0 +1,43 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_CFLAGS := -std=c99
+
+LOCAL_C_INCLUDES := \
+	$(GALLIUM_TOP)/drivers/r600 \
+	$(DRM_TOP) \
+	$(DRM_TOP)/include/drm
+
+LOCAL_MODULE := libmesa_winsys_r600
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/winsys/radeon/drm/Android.mk b/src/gallium/winsys/radeon/drm/Android.mk
new file mode 100644
index 00000000000..c1922498225
--- /dev/null
+++ b/src/gallium/winsys/radeon/drm/Android.mk
@@ -0,0 +1,40 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_C_INCLUDES := \
+	$(DRM_TOP) \
+	$(DRM_TOP)/include/drm
+
+LOCAL_MODULE := libmesa_winsys_radeon
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)

From 974412d7b985f44c2d3a68f818d2723346a4512b Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sat, 28 May 2011 11:56:43 +0200
Subject: [PATCH 558/600] d3d1x: fix xs_set_samplers

---
 src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
index aedf82a4381..8e48d1883b1 100644
--- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
+++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
@@ -391,11 +391,12 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
 			{
 				samplers[s][start + i] = samps[i];
 				sampler_csos[s].v[start + i] = samps[i] ? samps[i]->object : default_sampler;
+				last_different = i;
 			}
 			if(last_different >= 0)
 			{
 				num_samplers[s] = std::max(num_samplers[s], start + last_different + 1);
-				update_flags |= (UPDATE_SAMPLERS_SHIFT + s);
+				update_flags |= 1 << (UPDATE_SAMPLERS_SHIFT + s);
 			}
 		}
 	}

From 55592d9da1bb694c7275984cf9a3ecaafcccf46a Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 25 Aug 2011 12:52:35 +0200
Subject: [PATCH 559/600] d3d1x: save to correct slot in
 xs_set_constant_buffers

---
 src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
index 8e48d1883b1..0a31cf10a34 100644
--- a/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
+++ b/src/gallium/state_trackers/d3d1x/gd3d11/d3d11_context.h
@@ -352,9 +352,9 @@ struct GalliumD3D10Device : public GalliumD3D10ScreenImpl<threadsafe>
 	{
 		for(unsigned i = 0; i < count; ++i)
 		{
-			if(constbufs[i] != constant_buffers[s][i].p)
+			if(constbufs[i] != constant_buffers[s][start + i].p)
 			{
-				constant_buffers[s][i] = constbufs[i];
+				constant_buffers[s][start + i] = constbufs[i];
 				if(s < caps.stages && start + i < caps.constant_buffers[s])
 					pipe->set_constant_buffer(pipe, s, start + i, constbufs[i] ? constbufs[i]->resource : NULL);
 			}

From 79a486ead92e4493b2de1fedf0c8cb5de47003cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20Wasserb=C3=A4ch?= <kai@dev.carbon-project.org>
Date: Tue, 23 Aug 2011 10:48:57 +0200
Subject: [PATCH 560/600] Change return type of try_emit_* methods to bool.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ian Romanick explained (Message-Id: <4E528973.6080902@freedesktop.org>),
that the return type of non-API methods shouldn't use GLboolean but a
standard C++ bool.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Bryan Cain <bryancain3@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Kai Wasserbäch <kai@dev.carbon-project.org>
---
 src/mesa/program/ir_to_mesa.cpp            |  8 ++++----
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index e7609df19ee..6820e4c6ba7 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -312,11 +312,11 @@ public:
    void emit_scs(ir_instruction *ir, enum prog_opcode op,
 		 dst_reg dst, const src_reg &src);
 
-   GLboolean try_emit_mad(ir_expression *ir,
+   bool try_emit_mad(ir_expression *ir,
 			  int mul_operand);
    bool try_emit_mad_for_and_not(ir_expression *ir,
 				 int mul_operand);
-   GLboolean try_emit_sat(ir_expression *ir);
+   bool try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
 
@@ -871,7 +871,7 @@ ir_to_mesa_visitor::visit(ir_function *ir)
    }
 }
 
-GLboolean
+bool
 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 {
    int nonmul_operand = 1 - mul_operand;
@@ -934,7 +934,7 @@ ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
    return true;
 }
 
-GLboolean
+bool
 ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
 {
    /* Saturates were only introduced to vertex programs in
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 6f0d9fa3f8f..fff848cbdf6 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -390,11 +390,11 @@ public:
    void emit_scs(ir_instruction *ir, unsigned op,
         	 st_dst_reg dst, const st_src_reg &src);
 
-   GLboolean try_emit_mad(ir_expression *ir,
-        		  int mul_operand);
+   bool try_emit_mad(ir_expression *ir,
+              int mul_operand);
    bool try_emit_mad_for_and_not(ir_expression *ir,
               int mul_operand);
-   GLboolean try_emit_sat(ir_expression *ir);
+   bool try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
 
@@ -1186,7 +1186,7 @@ glsl_to_tgsi_visitor::visit(ir_function *ir)
    }
 }
 
-GLboolean
+bool
 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 {
    int nonmul_operand = 1 - mul_operand;
@@ -1252,7 +1252,7 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan
    return true;
 }
 
-GLboolean
+bool
 glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
 {
    /* Saturates were only introduced to vertex programs in

From dbec3a5daf6fd012adc4d9690ef1dccc65969e04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kai=20Wasserb=C3=A4ch?= <kai@dev.carbon-project.org>
Date: Tue, 23 Aug 2011 10:48:58 +0200
Subject: [PATCH 561/600] Document the return type coding style.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As per discussion at [0] methods shouldn't use OpenGL return types, if
they're not part of the GL API.

[0] <http://marc.info/?l=mesa3d-dev&m=130754488901774&w=2>

Signed-off-by: Kai Wasserbäch <kai@dev.carbon-project.org>
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
---
 docs/devinfo.html | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/devinfo.html b/docs/devinfo.html
index c0966480ab7..d9e82e29d0b 100644
--- a/docs/devinfo.html
+++ b/docs/devinfo.html
@@ -137,6 +137,16 @@ Function name examples:
 	_mesa_foo_bar()  - an internal non-static Mesa function
 </pre>
 
+<p>
+Places that are not directly visible to the GL API should prefer the use
+of <tt>bool</tt>, <tt>true</tt>, and
+<tt>false</tt> over <tt>GLboolean</tt>, <tt>GL_TRUE</tt>, and
+<tt>GL_FALSE</tt>.  In C code, this may mean that
+<tt>#include &lt;stdbool.h&gt;</tt> need to be added.  The
+<tt>try_emit_</tt>* methods in src/mesa/program/ir_to_mesa.cpp and
+src/mesa/state_tracker/st_glsl_to_tgsi.cpp can serve as an example.
+</p>
+
 
 <H2>Making a New Mesa Release</H2>
 

From b9eb4d8a59699e233255113acafae220c3d8fe3c Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 9 Aug 2011 10:53:29 -0700
Subject: [PATCH 562/600] glsl: Implement the GL_ARB_conservative_depth
 extension.

It's the same as GL_AMD_conservative_depth.  The specs have slight
differences in wording, but don't differ in content or behavior.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 docs/GL3.txt                    | 2 +-
 src/glsl/glsl_parser.yy         | 4 ++--
 src/glsl/glsl_parser_extras.cpp | 1 +
 src/glsl/glsl_parser_extras.h   | 2 ++
 src/glsl/ir.h                   | 2 +-
 src/glsl/linker.cpp             | 2 +-
 src/mesa/main/extensions.c      | 1 +
 7 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index c0cc4d172e0..ff1f5020a5b 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -123,7 +123,7 @@ GL_ARB_texture_storage                               not started
 GL_ARB_transform_feedback_instanced                  not started
 GL_ARB_base_instance                                 not started
 GL_ARB_shader_image_load_store                       not started
-GL_ARB_conservative_depth                            not started (may be close to AMD_conservative_depth though)
+GL_ARB_conservative_depth                            DONE (compiler)
 GL_ARB_shading_language_420pack                      not started
 GL_ARB_internalformat_query                          not started
 GL_ARB_map_buffer_alignment                          not started
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 1851f1e202e..25d02fb1eaf 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1111,7 +1111,7 @@ layout_qualifier_id:
 	      }
 	   }
 
-	   /* Layout qualifiers for AMD_conservative_depth. */
+	   /* Layout qualifiers for AMD/ARB_conservative_depth. */
 	   if (!got_one && state->AMD_conservative_depth_enable) {
 	      if (strcmp($1, "depth_any") == 0) {
 	         got_one = true;
@@ -1129,7 +1129,7 @@ layout_qualifier_id:
 	
 	      if (got_one && state->AMD_conservative_depth_warn) {
 	         _mesa_glsl_warning(& @1, state,
-	                            "GL_AMD_conservative_depth "
+	                            "GL_ARB_conservative_depth "
 	                            "layout qualifier `%s' is used\n", $1);
 	      }
 	   }
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index cc781378d76..8f740e6a8e9 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -253,6 +253,7 @@ struct _mesa_glsl_extension {
 static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    /*                                  target availability  API availability */
    /* name                             VS     GS     FS     GL     ES         supported flag */
+   EXT(ARB_conservative_depth,         true,  false, true,  true,  false,     AMD_conservative_depth),
    EXT(ARB_draw_buffers,               false, false, true,  true,  false,     dummy_true),
    EXT(ARB_draw_instanced,             true,  false, false, true,  false,     ARB_draw_instanced),
    EXT(ARB_explicit_attrib_location,   true,  false, true,  true,  false,     ARB_explicit_attrib_location),
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index fc392da5b21..dc6911d1c9a 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -180,6 +180,8 @@ struct _mesa_glsl_parse_state {
    bool ARB_shader_stencil_export_warn;
    bool AMD_conservative_depth_enable;
    bool AMD_conservative_depth_warn;
+   bool ARB_conservative_depth_enable;
+   bool ARB_conservative_depth_warn;
    bool AMD_shader_stencil_export_enable;
    bool AMD_shader_stencil_export_warn;
    bool OES_texture_3D_enable;
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 990aaa16af3..2e899f3ed6f 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -236,7 +236,7 @@ enum ir_variable_interpolation {
 /**
  * \brief Layout qualifiers for gl_FragDepth.
  *
- * The AMD_conservative_depth extension allows gl_FragDepth to be redeclared
+ * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
  * with a layout qualifier.
  */
 enum ir_depth_layout {
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index b54ef41080a..ba81c59ff2c 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -395,7 +395,7 @@ cross_validate_globals(struct gl_shader_program *prog,
 
         /* Validate layout qualifiers for gl_FragDepth.
          *
-         * From the AMD_conservative_depth spec:
+         * From the AMD/ARB_conservative_depth specs:
          *    "If gl_FragDepth is redeclared in any fragment shader in
          *    a program, it must be redeclared in all fragment shaders in that
          *    program that have static assignments to gl_FragDepth. All
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index bc61c50a90f..14b0cf9acbd 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -81,6 +81,7 @@ static const struct extension extension_table[] = {
    { "GL_ARB_blend_func_extended",                 o(ARB_blend_func_extended),                 GL,             2009 },
    { "GL_ARB_color_buffer_float",                  o(ARB_color_buffer_float),                  GL,             2004 },
    { "GL_ARB_copy_buffer",                         o(ARB_copy_buffer),                         GL,             2008 },
+   { "GL_ARB_conservative_depth",                  o(AMD_conservative_depth),                  GL,             2011 },
    { "GL_ARB_depth_buffer_float",                  o(ARB_depth_buffer_float),                  GL,             2008 },
    { "GL_ARB_depth_clamp",                         o(ARB_depth_clamp),                         GL,             2003 },
    { "GL_ARB_depth_texture",                       o(ARB_depth_texture),                       GL,             2001 },

From 7f1b9ddd12e97ac57c4818646c17521bb0c2c358 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 25 Aug 2011 11:35:01 +0100
Subject: [PATCH 563/600] tgsi: add TXQ support. (v2)

this adds another callback in the sampler struct containing get_dims
entry point. This is used to query the driver for the texture resource
dimensions for the resource bound to the current sampler.

v2: remove unusued variable, fix indent

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 30 +++++++++++++++++++++++++-
 src/gallium/auxiliary/tgsi/tgsi_exec.h |  2 ++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 587483c049b..bfddf0db5f5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1920,7 +1920,35 @@ exec_txd(struct tgsi_exec_machine *mach,
    }
 }
 
+static void
+exec_txq(struct tgsi_exec_machine *mach,
+         const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_sampler *sampler;
+   const uint unit = inst->Src[1].Register.Index;
+   int result[4];
+   union tgsi_exec_channel r[4], src;
+   uint chan;
+   int i,j;
 
+   fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_INT);
+   sampler = mach->Samplers[unit];
+
+   sampler->get_dims(sampler, src.i[0], result);
+
+   for (i = 0; i < QUAD_SIZE; i++) {
+      for (j = 0; j < 4; j++) {
+	 r[j].i[i] = result[j];
+      }
+   }
+
+   for (chan = 0; chan < NUM_CHANNELS; chan++) {
+      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+	 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
+		    TGSI_EXEC_DATA_INT);
+      }
+   }
+}
 
 static void
 exec_sample(struct tgsi_exec_machine *mach,
@@ -3718,7 +3746,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_TXQ:
-      assert (0);
+      exec_txq(mach, inst);
       break;
 
    case TGSI_OPCODE_EMIT:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 6c32ccff323..2162a071a0b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -90,6 +90,8 @@ struct tgsi_sampler
                        const float c0[QUAD_SIZE],
                        enum tgsi_sampler_control control,
                        float rgba[NUM_CHANNELS][QUAD_SIZE]);
+   void (*get_dims)(struct tgsi_sampler *sampler, int level,
+		    int dims[4]);
 };
 
 #define TGSI_EXEC_NUM_TEMPS       128

From 461646f539aa306afa1df3f9d9c72da06818a3b6 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 25 Aug 2011 11:35:52 +0100
Subject: [PATCH 564/600] softpipe: add get_dims callback for TXQ support. (v2)

This adds the get_dims callback that is called from the tgsi exec_txq.

It returns values as per EXT_gpu_program4.

v2: fix one indent + use a switch (slighty modified from Brian)

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 40 ++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index f7309480bb9..2b03d844f53 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2566,6 +2566,45 @@ sp_sampler_variant_destroy( struct sp_sampler_variant *samp )
    FREE(samp);
 }
 
+static void
+sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level,
+		int dims[4])
+{
+    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+    const struct pipe_sampler_view *view = samp->view;
+    const struct pipe_resource *texture = view->texture;
+
+    /* undefined according to EXT_gpu_program */
+    level += view->u.tex.first_level;
+    if (level > view->u.tex.last_level)
+	return;
+
+    dims[0] = u_minify(texture->width0, level);
+
+    switch(texture->target) {
+    case PIPE_TEXTURE_1D_ARRAY:
+       dims[1] = texture->array_size;
+       /* fallthrough */
+    case PIPE_TEXTURE_1D:
+    case PIPE_BUFFER:
+       return;
+    case PIPE_TEXTURE_2D_ARRAY:
+       dims[2] = texture->array_size;
+       /* fallthrough */
+    case PIPE_TEXTURE_2D:
+    case PIPE_TEXTURE_CUBE:
+    case PIPE_TEXTURE_RECT:
+       dims[1] = u_minify(texture->height0, level);
+       return;
+    case PIPE_TEXTURE_3D:
+       dims[1] = u_minify(texture->height0, level);
+       dims[2] = u_minify(texture->depth0, level);
+       return;
+    default:
+       assert(!"unexpected texture target in sample_get_dims()");
+       return;
+    }
+}
 
 /**
  * Create a sampler variant for a given set of non-orthogonal state.
@@ -2692,5 +2731,6 @@ sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
       samp->base.get_samples = samp->sample_target;
    }
 
+   samp->base.get_dims = sample_get_dims;
    return samp;
 }

From 515d9e88801e2e1e2a7ac74ccd43f8fedfb80a96 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 24 Aug 2011 13:24:25 +0100
Subject: [PATCH 565/600] glsl_to_tgsi: implement TXS/TXQ. (v2)

GLSL uses TXS, call the gallium TXQ opcode.

v2: fix indent from 4->3.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Bryan Cain <bryancain3@gmail.com>
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 27 ++++++++++++++--------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index fff848cbdf6..85e4c662fea 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2426,16 +2426,18 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
    glsl_to_tgsi_instruction *inst = NULL;
    unsigned opcode = TGSI_OPCODE_NOP;
 
-   ir->coordinate->accept(this);
+   if (ir->coordinate) {
+      ir->coordinate->accept(this);
 
-   /* Put our coords in a temp.  We'll need to modify them for shadow,
-    * projection, or LOD, so the only case we'd use it as is is if
-    * we're doing plain old texturing.  The optimization passes on
-    * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
-    */
-   coord = get_temp(glsl_type::vec4_type);
-   coord_dst = st_dst_reg(coord);
-   emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+      /* Put our coords in a temp.  We'll need to modify them for shadow,
+       * projection, or LOD, so the only case we'd use it as is is if
+       * we're doing plain old texturing.  The optimization passes on
+       * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
+       */
+      coord = get_temp(glsl_type::vec4_type);
+      coord_dst = st_dst_reg(coord);
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+   }
 
    if (ir->projector) {
       ir->projector->accept(this);
@@ -2470,6 +2472,10 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       dy = this->result;
       break;
    case ir_txs:
+      opcode = TGSI_OPCODE_TXQ;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
+      break;
    case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
       assert(!"GLSL 1.30 features unsupported");
       break;
@@ -2544,6 +2550,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
 
    if (opcode == TGSI_OPCODE_TXD)
       inst = emit(ir, opcode, result_dst, coord, dx, dy);
+   else if (opcode == TGSI_OPCODE_TXQ)
+      inst = emit(ir, opcode, result_dst, lod_info);
    else
       inst = emit(ir, opcode, result_dst, coord);
 
@@ -4276,6 +4284,7 @@ compile_tgsi_instruction(struct st_translate *t,
    case TGSI_OPCODE_TXD:
    case TGSI_OPCODE_TXL:
    case TGSI_OPCODE_TXP:
+   case TGSI_OPCODE_TXQ:
       src[num_src++] = t->samplers[inst->sampler];
       ureg_tex_insn(ureg,
                     inst->op,

From d562f97bef99e051842ae0cec8f5ac46a10a73c4 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 25 Aug 2011 14:53:21 +0100
Subject: [PATCH 566/600] tgsi: add TXF support.

This is a straight texel fetch with no filtering or clamping. It uses
integers to specify the i/j/k (from EXT_gpu_shader4).

To enable this I had to add another hook into the tgsi sampler so that
we could easily bypass all the filtering sample does.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 57 +++++++++++++++++++++++++-
 src/gallium/auxiliary/tgsi/tgsi_exec.h |  3 ++
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index bfddf0db5f5..38dc1efa551 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1594,6 +1594,9 @@ store_dest(struct tgsi_exec_machine *mach,
 #define FETCH(VAL,INDEX,CHAN)\
     fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
 
+#define IFETCH(VAL,INDEX,CHAN)\
+    fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
+
 
 /**
  * Execute ARB-style KIL which is predicated by a src register.
@@ -1920,6 +1923,58 @@ exec_txd(struct tgsi_exec_machine *mach,
    }
 }
 
+
+static void
+exec_txf(struct tgsi_exec_machine *mach,
+	 const struct tgsi_full_instruction *inst)
+{
+   struct tgsi_sampler *sampler;
+   const uint unit = inst->Src[1].Register.Index;
+   union tgsi_exec_channel r[4];
+   uint chan;
+   float rgba[NUM_CHANNELS][QUAD_SIZE];
+   int j;
+
+   IFETCH(&r[3], 0, CHAN_W);
+
+   switch(inst->Texture.Texture) {
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_2D_ARRAY:
+      IFETCH(&r[2], 0, CHAN_Z);
+      /* fallthrough */
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+   case TGSI_TEXTURE_1D_ARRAY:
+      IFETCH(&r[1], 0, CHAN_Y);
+      /* fallthrough */
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_SHADOW1D:
+      IFETCH(&r[0], 0, CHAN_X);
+      break;
+   default:
+      assert(0);
+      break;
+   }      
+
+   sampler = mach->Samplers[unit];
+   sampler->get_texel(sampler, r[0].i, r[1].i, r[2].i, r[3].i, rgba);
+
+   for (j = 0; j < QUAD_SIZE; j++) {
+      r[0].f[j] = rgba[0][j];
+      r[1].f[j] = rgba[1][j];
+      r[2].f[j] = rgba[2][j];
+      r[3].f[j] = rgba[3][j];
+   }
+
+   for (chan = 0; chan < NUM_CHANNELS; chan++) {
+      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+         store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+      }
+   }
+}
+
 static void
 exec_txq(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
@@ -3742,7 +3797,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_TXF:
-      assert (0);
+      exec_txf(mach, inst);
       break;
 
    case TGSI_OPCODE_TXQ:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 2162a071a0b..3f6964c17fb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -92,6 +92,9 @@ struct tgsi_sampler
                        float rgba[NUM_CHANNELS][QUAD_SIZE]);
    void (*get_dims)(struct tgsi_sampler *sampler, int level,
 		    int dims[4]);
+   void (*get_texel)(struct tgsi_sampler *sampler, const int i[QUAD_SIZE],
+		     const int j[QUAD_SIZE], const int k[QUAD_SIZE],
+		     const int lod[QUAD_SIZE], float rgba[NUM_CHANNELS][QUAD_SIZE]);
 };
 
 #define TGSI_EXEC_NUM_TEMPS       128

From 62ad6e66a5c11fa58e51a6251f97a12a759773ec Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 25 Aug 2011 14:54:27 +0100
Subject: [PATCH 567/600] softpipe: implement TXF support via get_texel
 callback

This just calls the texel fetch functions directly bypassing the sampling,

notes:
1: loops inside switch should be more optimal.
2: borders can be sampled though only up to border depth, outside that
its undefined.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 69 ++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 2b03d844f53..76ec2f45126 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2606,6 +2606,74 @@ sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level,
     }
 }
 
+/* this function is only used for unfiltered texel gets
+   via the TGSI TXF opcode. */
+static void
+sample_get_texels(struct tgsi_sampler *tgsi_sampler,
+	   const int v_i[QUAD_SIZE],
+	   const int v_j[QUAD_SIZE],
+	   const int v_k[QUAD_SIZE],
+	   const int lod[QUAD_SIZE],
+	   float rgba[NUM_CHANNELS][QUAD_SIZE])
+{
+   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
+   union tex_tile_address addr;
+   const struct pipe_resource *texture = samp->view->texture;
+   int j, c;
+   float *tx;
+
+   addr.value = 0;
+   /* TODO write a better test for LOD */
+   addr.bits.level = lod[0];
+
+   switch(texture->target) {
+   case PIPE_TEXTURE_1D:
+      for (j = 0; j < QUAD_SIZE; j++) {
+	 tx = get_texel_2d(samp, addr, v_i[j], 0);
+	 for (c = 0; c < 4; c++) {
+	    rgba[c][j] = tx[c];
+	 }
+      }
+      break;
+   case PIPE_TEXTURE_1D_ARRAY:
+      for (j = 0; j < QUAD_SIZE; j++) {
+	 tx = get_texel_1d_array(samp, addr, v_i[j], v_j[j]);
+	 for (c = 0; c < 4; c++) {
+	    rgba[c][j] = tx[c];
+	 }
+      }
+      break;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+      for (j = 0; j < QUAD_SIZE; j++) {
+	 tx = get_texel_2d(samp, addr, v_i[j], v_j[j]);
+	 for (c = 0; c < 4; c++) {
+	    rgba[c][j] = tx[c];
+	 }
+      }
+      break;
+   case PIPE_TEXTURE_2D_ARRAY:
+      for (j = 0; j < QUAD_SIZE; j++) {
+	 tx = get_texel_2d_array(samp, addr, v_i[j], v_j[j], v_k[j]);
+	 for (c = 0; c < 4; c++) {
+	    rgba[c][j] = tx[c];
+	 }
+      }
+      break;
+   case PIPE_TEXTURE_3D:
+      for (j = 0; j < QUAD_SIZE; j++) {
+	 tx = get_texel_3d(samp, addr, v_i[j], v_j[j], v_k[j]);
+	 for (c = 0; c < 4; c++) {
+	    rgba[c][j] = tx[c];
+	 }
+      }
+      break;
+   case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
+   default:
+      assert(!"Unknown or CUBE texture type in TXF processing\n");
+      break;
+   }
+}
 /**
  * Create a sampler variant for a given set of non-orthogonal state.
  */
@@ -2732,5 +2800,6 @@ sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
    }
 
    samp->base.get_dims = sample_get_dims;
+   samp->base.get_texel = sample_get_texels;
    return samp;
 }

From 5f3de17ef0f8b6280a6bf331ea6686a260f0d0d4 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 25 Aug 2011 13:38:43 +0100
Subject: [PATCH 568/600] glsl_to_tgsi: add TXF support. (v2)

This adds texelFetch support to translate from GLSL to TGSI TXF opcode.

I've tested this works with an r600g and softpipe backend.

v2: drop comments, fix title,

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Bryan Cain <bryancain3@gmail.com>
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 85e4c662fea..9cac30995af 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2476,8 +2476,10 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       ir->lod_info.lod->accept(this);
       lod_info = this->result;
       break;
-   case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
-      assert(!"GLSL 1.30 features unsupported");
+   case ir_txf:
+      opcode = TGSI_OPCODE_TXF;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
       break;
    }
 
@@ -2541,7 +2543,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       coord_dst.writemask = WRITEMASK_XYZW;
    }
 
-   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) {
+   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
+       opcode == TGSI_OPCODE_TXF) {
       /* TGSI stores LOD or LOD bias in the last channel of the coords. */
       coord_dst.writemask = WRITEMASK_W;
       emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
@@ -4285,6 +4288,7 @@ compile_tgsi_instruction(struct st_translate *t,
    case TGSI_OPCODE_TXL:
    case TGSI_OPCODE_TXP:
    case TGSI_OPCODE_TXQ:
+   case TGSI_OPCODE_TXF:
       src[num_src++] = t->samplers[inst->sampler];
       ureg_tex_insn(ureg,
                     inst->op,

From e3a7cb4a6c94efe250c0212f062930e2026a861d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 25 Aug 2011 10:06:29 -0600
Subject: [PATCH 569/600] softpipe: add const qualifier to silence warnings

---
 src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 76ec2f45126..89c6536b1f4 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2620,7 +2620,7 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
    const struct pipe_resource *texture = samp->view->texture;
    int j, c;
-   float *tx;
+   const float *tx;
 
    addr.value = 0;
    /* TODO write a better test for LOD */

From 27395cb5b688d3d255d11c4d766a2699fd1c67d4 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 25 Aug 2011 10:31:17 -0600
Subject: [PATCH 570/600] pp: add files to Makefile.sources

---
 src/gallium/auxiliary/Makefile.sources | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 07cc3156f07..766beb0fafc 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -57,6 +57,12 @@ C_SOURCES := \
 	pipebuffer/pb_bufmgr_pool.c \
 	pipebuffer/pb_bufmgr_slab.c \
 	pipebuffer/pb_validate.c \
+	postprocess/pp_celshade.c \
+	postprocess/pp_colors.c \
+	postprocess/pp_init.c \
+	postprocess/pp_mlaa.c \
+	postprocess/pp_run.c \
+	postprocess/pp_program.c \
 	rbug/rbug_connection.c \
 	rbug/rbug_context.c \
 	rbug/rbug_core.c \

From c25b4943322ddd31d70d87464fe32be26ac6c858 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 25 Aug 2011 09:43:41 -0700
Subject: [PATCH 571/600] glsl: Bail after reporting an error for non-constant
 const_in parameters.

Otherwise we continue and hit the "Illegal formal parameter mode"
assertion.

Fixes negative compile test texelFetchOffset.frag in piglit.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/glsl/ast_function.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index c49a33d0486..8b79d850581 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -164,6 +164,7 @@ match_function_by_name(exec_list *instructions, const char *name,
 	    _mesa_glsl_error(loc, state,
 			     "parameter `%s' must be a constant expression",
 			     formal->name);
+	    return ir_call::get_error_instruction(ctx);
 	 }
 
 	 if ((formal->mode == ir_var_out)

From 8ce716257a3529d0aa750d7a7b344984c5ac4274 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 25 Aug 2011 21:05:13 +0100
Subject: [PATCH 572/600] glsl: fix crash when a const is passed to
 texelFetchOffset

while debugging texelFetchOffset we kept hitting the assert.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/ast_function.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 8b79d850581..ca45934a478 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -195,6 +195,7 @@ match_function_by_name(exec_list *instructions, const char *name,
 
 	 if (formal->type->is_numeric() || formal->type->is_boolean()) {
             switch (formal->mode) {
+            case ir_var_const_in:
             case ir_var_in: {
                ir_rvalue *converted
                   = convert_component(actual, formal->type);

From 116680ddc28c2c3b04fd78acdaa3ef2108c43872 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 2 Aug 2011 19:29:52 -0700
Subject: [PATCH 573/600] i965: Remove all bits of NRM3 and NRM4 code

Nothing in Mesa generates these opcodes, and i965 hardware cannot
support it natively.  If support were ever added for this opcode in
Mesa, there had better be a lowering pass for hardware that doesn't
support it natively.
---
 src/mesa/drivers/dri/i965/brw_vs_emit.c | 31 -------------------------
 1 file changed, 31 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index a06a2bbec52..bfee811e13d 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1096,31 +1096,6 @@ static void emit_lrp_noalias(struct brw_vs_compile *c,
    brw_MAC(p, dst, arg0, arg1);
 }
 
-/** 3 or 4-component vector normalization */
-static void emit_nrm( struct brw_vs_compile *c, 
-                      struct brw_reg dst,
-                      struct brw_reg arg0,
-                      int num_comps)
-{
-   struct brw_compile *p = &c->func;
-   struct brw_reg tmp = get_tmp(c);
-
-   /* tmp = dot(arg0, arg0) */
-   if (num_comps == 3)
-      brw_DP3(p, tmp, arg0, arg0);
-   else
-      brw_DP4(p, tmp, arg0, arg0);
-
-   /* tmp = 1 / sqrt(tmp) */
-   emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL);
-
-   /* dst = arg0 * tmp */
-   brw_MUL(p, dst, arg0, tmp);
-
-   release_tmp(c, tmp);
-}
-
-
 static struct brw_reg
 get_constant(struct brw_vs_compile *c,
              const struct prog_instruction *inst,
@@ -2045,12 +2020,6 @@ void brw_old_vs_emit(struct brw_vs_compile *c )
       case OPCODE_DPH:
 	 brw_DPH(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_NRM3:
-	 emit_nrm(c, dst, args[0], 3);
-	 break;
-      case OPCODE_NRM4:
-	 emit_nrm(c, dst, args[0], 4);
-	 break;
       case OPCODE_DST:
 	 unalias2(c, dst, args[0], args[1], emit_dst_noalias); 
 	 break;

From 778ecc928388b2905d516743d0bdf19ffce03acb Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 25 Aug 2011 13:11:36 -0700
Subject: [PATCH 574/600] glcpp: Add GL_ARB_conservative_depth #define.

Forgotten in the patch that enabled the extension.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/glcpp/glcpp-parse.y | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 0a35e88cec7..940830416c0 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -1132,8 +1132,10 @@ glcpp_parser_create (const struct gl_extensions *extensions, int api)
 	   if (extensions->ARB_shader_texture_lod)
 	      add_builtin_define(parser, "GL_ARB_shader_texture_lod", 1);
 
-	   if (extensions->AMD_conservative_depth)
+	   if (extensions->AMD_conservative_depth) {
 	      add_builtin_define(parser, "GL_AMD_conservative_depth", 1);
+	      add_builtin_define(parser, "GL_ARB_conservative_depth", 1);
+	   }
 	}
 
 	language_version = 110;

From 6ba68c7654ee9c2e90b99b4ba653287684904c74 Mon Sep 17 00:00:00 2001
From: Vadim Girlin <vadimgirlin@gmail.com>
Date: Thu, 25 Aug 2011 00:32:54 +0400
Subject: [PATCH 575/600] r600g: fix check_and_set_bank_swizzle

Need to do full check when not all bank swizzles in the group are forced
(e.g. when trying to merge interp_* group with the next instruction)

Note: This is a candidate for the 7.11 branch.

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600_asm.c | 30 +++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 0311b562f27..604cb604339 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -696,15 +696,19 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
 {
 	struct alu_bank_swizzle bs;
 	int bank_swizzle[5];
-	int i, r = 0, forced = 0;
+	int i, r = 0, forced = 1;
 	boolean scalar_only = bc->chip_class == CAYMAN ? false : true;
 	int max_slots = bc->chip_class == CAYMAN ? 4 : 5;
 
 	for (i = 0; i < max_slots; i++) {
-		if (slots[i] && slots[i]->bank_swizzle_force) {
-			slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
-			forced = 1;
+		if (slots[i]) {
+			if (slots[i]->bank_swizzle_force) {
+				slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
+			} else {
+				forced = 0;
+			}
 		}
+
 		if (i < 4 && slots[i])
 			scalar_only = false;
 	}
@@ -714,7 +718,11 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
 	/* Just check every possible combination of bank swizzle.
 	 * Not very efficent, but works on the first try in most of the cases. */
 	for (i = 0; i < 4; i++)
-		bank_swizzle[i] = SQ_ALU_VEC_012;
+		if (!slots[i] || !slots[i]->bank_swizzle_force)
+			bank_swizzle[i] = SQ_ALU_VEC_012;
+		else
+			bank_swizzle[i] = slots[i]->bank_swizzle;
+
 	bank_swizzle[4] = SQ_ALU_SCL_210;
 	while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
 
@@ -751,11 +759,13 @@ static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
 			bank_swizzle[4]++;
 		} else {
 			for (i = 0; i < max_slots; i++) {
-				bank_swizzle[i]++;
-				if (bank_swizzle[i] <= SQ_ALU_VEC_210)
-					break;
-				else
-					bank_swizzle[i] = SQ_ALU_VEC_012;
+				if (!slots[i] || !slots[i]->bank_swizzle_force) {
+					bank_swizzle[i]++;
+					if (bank_swizzle[i] <= SQ_ALU_VEC_210)
+						break;
+					else
+						bank_swizzle[i] = SQ_ALU_VEC_012;
+				}
 			}
 		}
 	}

From fdb62ef3f5b0fadd3cbac610f5b612bcfad5af1a Mon Sep 17 00:00:00 2001
From: Vadim Girlin <vadimgirlin@gmail.com>
Date: Thu, 25 Aug 2011 00:32:55 +0400
Subject: [PATCH 576/600] r600g: fix replace_gpr_with_pv_ps

Instructions with 3 source operands have no write mask, so we may replace their
destinations with PV/PS in the next group even if their dst.write is 0.

Note: This is a candidate for the 7.11 branch.

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600_asm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 604cb604339..27febdf9d03 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -787,7 +787,7 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
 		return r;
 
 	for (i = 0; i < max_slots; ++i) {
-		if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
+		if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && !prev[i]->dst.rel) {
 			gpr[i] = prev[i]->dst.sel;
 			/* cube writes more than PV.X */
 			if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i]))

From b97889f543085f516fc1c821c621790399d57fa5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= <marcheu@chromium.org>
Date: Tue, 23 Aug 2011 18:44:36 -0700
Subject: [PATCH 577/600] i915g: Improve the flush heuristic by using the
 previous frame's number of vertices.

---
 src/gallium/drivers/i915/i915_batch.h   | 13 +++++++++----
 src/gallium/drivers/i915/i915_clear.c   |  3 +++
 src/gallium/drivers/i915/i915_context.h |  5 ++++-
 src/gallium/drivers/i915/i915_flush.c   |  3 ++-
 4 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/i915/i915_batch.h b/src/gallium/drivers/i915/i915_batch.h
index a1f8bcae802..56d331f3e7a 100644
--- a/src/gallium/drivers/i915/i915_batch.h
+++ b/src/gallium/drivers/i915/i915_batch.h
@@ -64,11 +64,16 @@ static INLINE void i915_flush_heuristically(struct i915_context* i915,
                                             int num_vertex)
 {
    struct i915_winsys *iws = i915->iws;
-   i915->vertices_since_last_flush += num_vertex;
-   if ( i915->vertices_since_last_flush > 4096
-      || ( i915->vertices_since_last_flush > 256 &&
-           !iws->buffer_is_busy(iws, i915->current.cbuf_bo)) )
+
+   i915->queued_vertices += num_vertex;
+
+   /* fire if we have more than 1/20th of the last frame's vertices */
+   if (i915->queued_vertices > i915->last_fired_vertices / 20) {
       FLUSH_BATCH(NULL);
+      i915->fired_vertices += i915->queued_vertices;
+      i915->queued_vertices = 0;
+      return;
+   }
 }
 
 
diff --git a/src/gallium/drivers/i915/i915_clear.c b/src/gallium/drivers/i915/i915_clear.c
index e1d6a749cdc..4f9aa2c3120 100644
--- a/src/gallium/drivers/i915/i915_clear.c
+++ b/src/gallium/drivers/i915/i915_clear.c
@@ -125,6 +125,9 @@ i915_clear_emit(struct pipe_context *pipe, unsigned buffers, const float *rgba,
     * This is not required, just a heuristic
     */
    FLUSH_BATCH(NULL);
+
+   i915->last_fired_vertices = i915->fired_vertices;
+   i915->fired_vertices = 0;
 }
 
 /**
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 84862351ffe..fca8688a526 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -264,7 +264,10 @@ struct i915_context {
    struct util_slab_mempool transfer_pool;
    struct util_slab_mempool texture_transfer_pool;
 
-   int vertices_since_last_flush;
+   /* state for tracking flushes */
+   int last_fired_vertices;
+   int fired_vertices;
+   int queued_vertices;
 
    /** blitter/hw-clear */
    struct blitter_context* blitter;
diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c
index 6d76afa9dbc..5d8e3c8274f 100644
--- a/src/gallium/drivers/i915/i915_flush.c
+++ b/src/gallium/drivers/i915/i915_flush.c
@@ -77,5 +77,6 @@ void i915_flush(struct i915_context *i915, struct pipe_fence_handle **fence)
    i915->static_dirty = ~0;
    /* kernel emits flushes in between batchbuffers */
    i915->flush_dirty = 0;
-   i915->vertices_since_last_flush = 0;
+   i915->fired_vertices += i915->queued_vertices;
+   i915->queued_vertices = 0;
 }

From 3d9000393b0c2e53f35e5a093ea6781f849230d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= <marcheu@chromium.org>
Date: Tue, 23 Aug 2011 18:47:24 -0700
Subject: [PATCH 578/600] i915g: Fix case where texcoords can overlap with
 fragpos/frontface.

---
 src/gallium/drivers/i915/i915_fpc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h
index b760bc461a1..b2683c82033 100644
--- a/src/gallium/drivers/i915/i915_fpc.h
+++ b/src/gallium/drivers/i915/i915_fpc.h
@@ -39,9 +39,9 @@
 
 #define I915_PROGRAM_SIZE 192
 
-/* Use those indices for pos/face routing, must be >= I915_TEX_UNITS */
-#define I915_SEMANTIC_POS  10
-#define I915_SEMANTIC_FACE 11
+/* Use those indices for pos/face routing, must be >= num of inputs */
+#define I915_SEMANTIC_POS  100
+#define I915_SEMANTIC_FACE 101
 
 
 /**

From a2c467c0548b658a92af11c063520bc4d027895e Mon Sep 17 00:00:00 2001
From: David Reveman <reveman@chromium.org>
Date: Tue, 23 Aug 2011 18:49:11 -0700
Subject: [PATCH 579/600] i915g: Fix off-by-one in scissors.

---
 src/gallium/drivers/i915/i915_state_dynamic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c
index 204cee6fe9e..1a21433eb9e 100644
--- a/src/gallium/drivers/i915/i915_state_dynamic.c
+++ b/src/gallium/drivers/i915/i915_state_dynamic.c
@@ -268,8 +268,8 @@ static void upload_SCISSOR_RECT(struct i915_context *i915)
 {
    unsigned x1 = i915->scissor.minx;
    unsigned y1 = i915->scissor.miny;
-   unsigned x2 = i915->scissor.maxx;
-   unsigned y2 = i915->scissor.maxy;
+   unsigned x2 = i915->scissor.maxx - 1;
+   unsigned y2 = i915->scissor.maxy - 1;
    unsigned sc[3];
 
    sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD;

From 6fb12bf031fdceadebc8a3d7b7756bc822fbf6e4 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 25 Aug 2011 13:03:19 +0100
Subject: [PATCH 580/600] tgsi: update tgsi.rst for TXQ

add some info on the TXQ opcode.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/docs/source/tgsi.rst | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 4debcc6ecc4..039cb1c03d5 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -1031,9 +1031,21 @@ XXX so let's discuss it, yeah?
   TBD
 
 
-.. opcode:: TXQ - Texture Size Query
+.. opcode:: TXQ - Texture Size Query (as per NV_gpu_program4)
+                  retrieve the dimensions of the texture
+                  depending on the target. For 1D (width), 2D/RECT/CUBE
+		  (width, height), 3D (width, height, depth),
+		  1D array (width, layers), 2D array (width, height, layers)
 
-  TBD
+.. math::
+
+  lod = src0
+
+  dst.x = texture_width(unit, lod)
+
+  dst.y = texture_height(unit, lod)
+
+  dst.z = texture_depth(unit, lod)
 
 
 .. opcode:: CONT - Continue

From 1d1d038c85ebb37f1da4540f092563e8ecab7dfb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <deathsimple@vodafone.de>
Date: Tue, 9 Aug 2011 18:45:13 +0200
Subject: [PATCH 581/600] g3dvl: Rework the decoder interface part 1/5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First of all get ride of the decode_buffer structure, while still giving
the decoder the ability to organize it's buffers depending on the needs
of the state tracker.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Younes Manton <younes.m@gmail.com>
---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c  | 465 ++++++++++--------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h  |   9 +-
 src/gallium/include/pipe/p_video_decoder.h    |  70 +--
 src/gallium/state_trackers/vdpau/decode.c     |  53 +-
 .../state_trackers/vdpau/vdpau_private.h      |   2 +-
 .../state_trackers/xorg/xvmc/surface.c        | 115 +++--
 .../state_trackers/xorg/xvmc/xvmc_private.h   |   6 +-
 7 files changed, 404 insertions(+), 316 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 61d947ca4c8..228a386ce4a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -76,20 +76,16 @@ static const unsigned num_mc_format_configs =
    sizeof(mc_format_config) / sizeof(struct format_config);
 
 static bool
-init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
+init_zscan_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer)
 {
    enum pipe_format formats[3];
 
    struct pipe_sampler_view **source;
    struct pipe_surface **destination;
 
-   struct vl_mpeg12_decoder *dec;
-
    unsigned i;
 
-   assert(buffer);
-
-   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
+   assert(dec && buffer);
 
    formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
    buffer->zscan_source = vl_video_buffer_create_ex
@@ -147,17 +143,13 @@ cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer)
 }
 
 static bool
-init_idct_buffer(struct vl_mpeg12_buffer *buffer)
+init_idct_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer)
 {
    struct pipe_sampler_view **idct_source_sv, **mc_source_sv;
 
-   struct vl_mpeg12_decoder *dec;
-
    unsigned i;
 
-   assert(buffer);
-
-   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
+   assert(dec && buffer);
 
    idct_source_sv = dec->idct_source->get_sampler_view_planes(dec->idct_source);
    if (!idct_source_sv)
@@ -187,27 +179,18 @@ error_source_sv:
 static void
 cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
 {
-   struct vl_mpeg12_decoder *dec;
    unsigned i;
    
    assert(buf);
 
-   dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
-   assert(dec);
-
    for (i = 0; i < 3; ++i)
       vl_idct_cleanup_buffer(&buf->idct[0]);
 }
 
 static bool
-init_mc_buffer(struct vl_mpeg12_buffer *buf)
+init_mc_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buf)
 {
-   struct vl_mpeg12_decoder *dec;
-
-   assert(buf);
-
-   dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
-   assert(dec);
+   assert(dec && buf);
 
    if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0]))
       goto error_mc_y;
@@ -242,16 +225,103 @@ cleanup_mc_buffer(struct vl_mpeg12_buffer *buf)
 }
 
 static void
-vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
+vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
 {
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_decoder *dec;
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
 
-   assert(buf);
+   assert(decoder);
+
+   /* Asserted in softpipe_delete_fs_state() for some reason */
+   dec->base.context->bind_vs_state(dec->base.context, NULL);
+   dec->base.context->bind_fs_state(dec->base.context, NULL);
+
+   dec->base.context->delete_depth_stencil_alpha_state(dec->base.context, dec->dsa);
+   dec->base.context->delete_sampler_state(dec->base.context, dec->sampler_ycbcr);
+
+   vl_mc_cleanup(&dec->mc_y);
+   vl_mc_cleanup(&dec->mc_c);
+   dec->mc_source->destroy(dec->mc_source);
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      vl_idct_cleanup(&dec->idct_y);
+      vl_idct_cleanup(&dec->idct_c);
+      dec->idct_source->destroy(dec->idct_source);
+   }
+
+   vl_zscan_cleanup(&dec->zscan_y);
+   vl_zscan_cleanup(&dec->zscan_c);
+
+   dec->base.context->delete_vertex_elements_state(dec->base.context, dec->ves_ycbcr);
+   dec->base.context->delete_vertex_elements_state(dec->base.context, dec->ves_mv);
+
+   pipe_resource_reference(&dec->quads.buffer, NULL);
+   pipe_resource_reference(&dec->pos.buffer, NULL);
+   pipe_resource_reference(&dec->block_num.buffer, NULL);
+
+   pipe_sampler_view_reference(&dec->zscan_linear, NULL);
+   pipe_sampler_view_reference(&dec->zscan_normal, NULL);
+   pipe_sampler_view_reference(&dec->zscan_alternate, NULL);
+
+   FREE(dec);
+}
+
+static void *
+vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
+   struct vl_mpeg12_buffer *buffer;
 
-   dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
    assert(dec);
 
+   buffer = CALLOC_STRUCT(vl_mpeg12_buffer);
+   if (buffer == NULL)
+      return NULL;
+
+   if (!vl_vb_init(&buffer->vertex_stream, dec->base.context,
+                   dec->base.width / MACROBLOCK_WIDTH,
+                   dec->base.height / MACROBLOCK_HEIGHT))
+      goto error_vertex_buffer;
+
+   if (!init_mc_buffer(dec, buffer))
+      goto error_mc;
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      if (!init_idct_buffer(dec, buffer))
+         goto error_idct;
+
+   if (!init_zscan_buffer(dec, buffer))
+      goto error_zscan;
+
+   if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
+      vl_mpg12_bs_init(&buffer->bs,
+                       dec->base.width / MACROBLOCK_WIDTH,
+                       dec->base.height / MACROBLOCK_HEIGHT);
+
+   return buffer;
+
+error_zscan:
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      cleanup_idct_buffer(buffer);
+
+error_idct:
+   cleanup_mc_buffer(buffer);
+
+error_mc:
+   vl_vb_cleanup(&buffer->vertex_stream);
+
+error_vertex_buffer:
+   FREE(buffer);
+   return NULL;
+}
+
+static void
+vl_mpeg12_destroy_buffer(struct pipe_video_decoder *decoder, void *buffer)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
+   struct vl_mpeg12_buffer *buf = buffer;
+
+   assert(dec && buf);
+
    cleanup_zscan_buffer(buf);
 
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
@@ -265,18 +335,96 @@ vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
 }
 
 static void
-vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer)
+vl_mpeg12_set_decode_buffer(struct pipe_video_decoder *decoder, void *buffer)
 {
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_decoder *dec;
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
 
+   assert(dec && buffer);
+
+   dec->current_buffer = buffer;
+}
+
+static void
+vl_mpeg12_set_picture_parameters(struct pipe_video_decoder *decoder,
+                                 struct pipe_picture_desc *picture)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   struct pipe_mpeg12_picture_desc *pic = (struct pipe_mpeg12_picture_desc *)picture;
+
+   assert(dec && pic);
+
+   dec->picture_desc = *pic;
+}
+
+static void
+vl_mpeg12_set_quant_matrix(struct pipe_video_decoder *decoder,
+                           const uint8_t intra_matrix[64],
+                           const uint8_t non_intra_matrix[64])
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+
+   assert(dec);
+
+   memcpy(dec->intra_matrix, intra_matrix, 64);
+   memcpy(dec->non_intra_matrix, non_intra_matrix, 64);
+}
+
+static void
+vl_mpeg12_set_decode_target(struct pipe_video_decoder *decoder,
+                            struct pipe_video_buffer *target)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   struct pipe_surface **surfaces;
+   unsigned i;
+
+   assert(dec);
+
+   surfaces = target->get_surfaces(target);
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      pipe_surface_reference(&dec->target_surfaces[i], surfaces[i]);
+}
+
+static void
+vl_mpeg12_set_reference_frames(struct pipe_video_decoder *decoder,
+                               struct pipe_video_buffer **ref_frames,
+                               unsigned num_ref_frames)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   struct pipe_sampler_view **sv;
+   unsigned i,j;
+
+   assert(dec);
+   assert(num_ref_frames <= VL_MAX_REF_FRAMES);
+
+   for (i = 0; i < num_ref_frames; ++i) {
+      sv = ref_frames[i]->get_sampler_view_planes(ref_frames[i]);
+      for (j = 0; j < VL_MAX_PLANES; ++j)
+         pipe_sampler_view_reference(&dec->ref_frames[i][j], sv[j]);
+   }
+
+   for (; i < VL_MAX_REF_FRAMES; ++i)
+      for (j = 0; j < VL_MAX_PLANES; ++j)
+         pipe_sampler_view_reference(&dec->ref_frames[i][j], NULL);
+}
+
+static void
+vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+
+   struct vl_mpeg12_buffer *buf;
    struct pipe_sampler_view **sampler_views;
    unsigned i;
 
+   assert(dec);
+
+   buf = dec->current_buffer;
    assert(buf);
 
-   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
-   assert(dec);
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      vl_zscan_upload_quant(&buf->zscan[i], dec->intra_matrix, true);
+      vl_zscan_upload_quant(&buf->zscan[i], dec->non_intra_matrix, false);
+   }
 
    vl_vb_map(&buf->vertex_stream, dec->base.context);
 
@@ -322,95 +470,84 @@ vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer)
    }
 }
 
-static void
-vl_mpeg12_buffer_set_quant_matrix(struct pipe_video_decode_buffer *buffer,
-                                  const uint8_t intra_matrix[64],
-                                  const uint8_t non_intra_matrix[64])
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   unsigned i;
-
-   for (i = 0; i < VL_MAX_PLANES; ++i) {
-      vl_zscan_upload_quant(&buf->zscan[i], intra_matrix, true);
-      vl_zscan_upload_quant(&buf->zscan[i], non_intra_matrix, false);
-   }
-}
-
 static struct pipe_ycbcr_block *
-vl_mpeg12_buffer_get_ycbcr_stream(struct pipe_video_decode_buffer *buffer, int component)
+vl_mpeg12_get_ycbcr_stream(struct pipe_video_decoder *decoder, int component)
 {
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
 
-   assert(buf);
+   assert(dec && dec->current_buffer);
+   assert(component < VL_MAX_PLANES);
 
-   return vl_vb_get_ycbcr_stream(&buf->vertex_stream, component);
+   return vl_vb_get_ycbcr_stream(&dec->current_buffer->vertex_stream, component);
 }
 
 static short *
-vl_mpeg12_buffer_get_ycbcr_buffer(struct pipe_video_decode_buffer *buffer, int component)
+vl_mpeg12_get_ycbcr_buffer(struct pipe_video_decoder *decoder, int component)
 {
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
 
-   assert(buf);
+   assert(dec && dec->current_buffer);
    assert(component < VL_MAX_PLANES);
 
-   return buf->texels[component];
+   return dec->current_buffer->texels[component];
 }
 
 static unsigned
-vl_mpeg12_buffer_get_mv_stream_stride(struct pipe_video_decode_buffer *buffer)
+vl_mpeg12_get_mv_stream_stride(struct pipe_video_decoder *decoder)
 {
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
 
-   assert(buf);
+   assert(dec && dec->current_buffer);
 
-   return vl_vb_get_mv_stream_stride(&buf->vertex_stream);
+   return vl_vb_get_mv_stream_stride(&dec->current_buffer->vertex_stream);
 }
 
 static struct pipe_motionvector *
-vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_frame)
+vl_mpeg12_get_mv_stream(struct pipe_video_decoder *decoder, int ref_frame)
 {
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
 
-   assert(buf);
+   assert(dec && dec->current_buffer);
 
-   return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame);
+   return vl_vb_get_mv_stream(&dec->current_buffer->vertex_stream, ref_frame);
 }
 
 static void
-vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
-                                  unsigned num_bytes, const void *data,
-                                  struct pipe_picture_desc *picture,
-                                  unsigned num_ycbcr_blocks[3])
+vl_mpeg12_decode_bitstream(struct pipe_video_decoder *decoder,
+                           unsigned num_bytes, const void *data,
+                           unsigned num_ycbcr_blocks[3])
 {
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct pipe_mpeg12_picture_desc *pic = (struct pipe_mpeg12_picture_desc *)picture;
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   struct vl_mpeg12_buffer *buf;
    
-   struct vl_mpeg12_decoder *dec;
    unsigned i;
 
-   assert(buf);
+   assert(dec && dec->current_buffer);
 
-   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
-   assert(dec);
+   buf = dec->current_buffer;
+   assert(buf);
 
    for (i = 0; i < VL_MAX_PLANES; ++i)
-      vl_zscan_set_layout(&buf->zscan[i], pic->alternate_scan ? dec->zscan_alternate : dec->zscan_normal);
+      vl_zscan_set_layout(&buf->zscan[i], dec->picture_desc.alternate_scan ?
+                          dec->zscan_alternate : dec->zscan_normal);
 
-   vl_mpg12_bs_decode(&buf->bs, num_bytes, data, pic, num_ycbcr_blocks);
+   vl_mpg12_bs_decode(&buf->bs, num_bytes, data, &dec->picture_desc, num_ycbcr_blocks);
 }
 
 static void
-vl_mpeg12_buffer_end_frame(struct pipe_video_decode_buffer *buffer)
+vl_mpeg12_end_frame(struct pipe_video_decoder *decoder, unsigned num_ycbcr_blocks[3])
 {
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_decoder *dec;
-   unsigned i;
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   struct pipe_sampler_view **mc_source_sv;
+   struct pipe_vertex_buffer vb[3];
+   struct vl_mpeg12_buffer *buf;
 
-   assert(buf);
+   unsigned i, j, component;
+   unsigned nr_components;
 
-   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
-   assert(dec);
+   assert(dec && dec->current_buffer);
+
+   buf = dec->current_buffer;
 
    vl_vb_unmap(&buf->vertex_stream, dec->base.context);
 
@@ -418,152 +555,23 @@ vl_mpeg12_buffer_end_frame(struct pipe_video_decode_buffer *buffer)
       dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer[i]);
       dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer[i]);
    }
-}
-
-static void
-vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
-{
-   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
-
-   assert(decoder);
-
-   /* Asserted in softpipe_delete_fs_state() for some reason */
-   dec->base.context->bind_vs_state(dec->base.context, NULL);
-   dec->base.context->bind_fs_state(dec->base.context, NULL);
-
-   dec->base.context->delete_depth_stencil_alpha_state(dec->base.context, dec->dsa);
-   dec->base.context->delete_sampler_state(dec->base.context, dec->sampler_ycbcr);
-
-   vl_mc_cleanup(&dec->mc_y);
-   vl_mc_cleanup(&dec->mc_c);
-   dec->mc_source->destroy(dec->mc_source);
-
-   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
-      vl_idct_cleanup(&dec->idct_y);
-      vl_idct_cleanup(&dec->idct_c);
-      dec->idct_source->destroy(dec->idct_source);
-   }
-
-   vl_zscan_cleanup(&dec->zscan_y);
-   vl_zscan_cleanup(&dec->zscan_c);
-
-   dec->base.context->delete_vertex_elements_state(dec->base.context, dec->ves_ycbcr);
-   dec->base.context->delete_vertex_elements_state(dec->base.context, dec->ves_mv);
-
-   pipe_resource_reference(&dec->quads.buffer, NULL);
-   pipe_resource_reference(&dec->pos.buffer, NULL);
-   pipe_resource_reference(&dec->block_num.buffer, NULL);
-
-   pipe_sampler_view_reference(&dec->zscan_linear, NULL);
-   pipe_sampler_view_reference(&dec->zscan_normal, NULL);
-   pipe_sampler_view_reference(&dec->zscan_alternate, NULL);
-
-   FREE(dec);
-}
-
-static struct pipe_video_decode_buffer *
-vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
-{
-   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
-   struct vl_mpeg12_buffer *buffer;
-
-   assert(dec);
-
-   buffer = CALLOC_STRUCT(vl_mpeg12_buffer);
-   if (buffer == NULL)
-      return NULL;
-
-   buffer->base.decoder = decoder;
-   buffer->base.destroy = vl_mpeg12_buffer_destroy;
-   buffer->base.begin_frame = vl_mpeg12_buffer_begin_frame;
-   buffer->base.set_quant_matrix = vl_mpeg12_buffer_set_quant_matrix;
-   buffer->base.get_ycbcr_stream = vl_mpeg12_buffer_get_ycbcr_stream;
-   buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer;
-   buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride;
-   buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream;
-   buffer->base.decode_bitstream = vl_mpeg12_buffer_decode_bitstream;
-   buffer->base.end_frame = vl_mpeg12_buffer_end_frame;
-
-   if (!vl_vb_init(&buffer->vertex_stream, dec->base.context,
-                   dec->base.width / MACROBLOCK_WIDTH,
-                   dec->base.height / MACROBLOCK_HEIGHT))
-      goto error_vertex_buffer;
-
-   if (!init_mc_buffer(buffer))
-      goto error_mc;
-
-   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      if (!init_idct_buffer(buffer))
-         goto error_idct;
-
-   if (!init_zscan_buffer(buffer))
-      goto error_zscan;
-
-   if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
-      vl_mpg12_bs_init(&buffer->bs,
-                       dec->base.width / MACROBLOCK_WIDTH,
-                       dec->base.height / MACROBLOCK_HEIGHT);
-
-   return &buffer->base;
-
-error_zscan:
-   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      cleanup_idct_buffer(buffer);
-
-error_idct:
-   cleanup_mc_buffer(buffer);
-
-error_mc:
-   vl_vb_cleanup(&buffer->vertex_stream);
-
-error_vertex_buffer:
-   FREE(buffer);
-   return NULL;
-}
-
-static void
-vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
-                               unsigned num_ycbcr_blocks[3],
-                               struct pipe_video_buffer *refs[2],
-                               struct pipe_video_buffer *dst)
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
-   struct vl_mpeg12_decoder *dec;
-
-   struct pipe_sampler_view **sv[VL_MAX_REF_FRAMES], **mc_source_sv;
-   struct pipe_surface **surfaces;
-
-   struct pipe_vertex_buffer vb[3];
-
-   unsigned i, j, component;
-   unsigned nr_components;
-
-   assert(buf);
-
-   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
-   assert(dec);
-
-   for (i = 0; i < 2; ++i)
-      sv[i] = refs[i] ? refs[i]->get_sampler_view_planes(refs[i]) : NULL;
 
    vb[0] = dec->quads;
    vb[1] = dec->pos;
 
-   surfaces = dst->get_surfaces(dst);
-
    dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_mv);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
-      if (!surfaces[i]) continue;
+      if (!dec->target_surfaces[i]) continue;
 
-      vl_mc_set_surface(&buf->mc[i], surfaces[i]);
+      vl_mc_set_surface(&buf->mc[i], dec->target_surfaces[i]);
 
       for (j = 0; j < VL_MAX_REF_FRAMES; ++j) {
-         if (!sv[j]) continue;
+         if (!dec->ref_frames[j][i]) continue;
 
          vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);;
          dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
 
-         vl_mc_render_ref(&buf->mc[i], sv[j][i]);
+         vl_mc_render_ref(&buf->mc[i], dec->ref_frames[j][i]);
       }
    }
 
@@ -584,9 +592,9 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 
    mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
    for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) {
-      if (!surfaces[i]) continue;
+      if (!dec->target_surfaces[i]) continue;
 
-      nr_components = util_format_get_nr_components(surfaces[i]->texture->format);
+      nr_components = util_format_get_nr_components(dec->target_surfaces[i]->texture->format);
       for (j = 0; j < nr_components; ++j, ++component) {
          if (!num_ycbcr_blocks[i]) continue;
 
@@ -604,6 +612,14 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
    }
 }
 
+static void
+vl_mpeg12_flush(struct pipe_video_decoder *decoder)
+{
+   assert(decoder);
+
+   //Noop, for shaders it is much faster to flush everything in end_frame
+}
+
 static bool
 init_pipe_state(struct vl_mpeg12_decoder *dec)
 {
@@ -870,7 +886,20 @@ vl_create_mpeg12_decoder(struct pipe_context *context,
 
    dec->base.destroy = vl_mpeg12_destroy;
    dec->base.create_buffer = vl_mpeg12_create_buffer;
-   dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer;
+   dec->base.destroy_buffer = vl_mpeg12_destroy_buffer;
+   dec->base.set_decode_buffer = vl_mpeg12_set_decode_buffer;
+   dec->base.set_picture_parameters = vl_mpeg12_set_picture_parameters;
+   dec->base.set_quant_matrix = vl_mpeg12_set_quant_matrix;
+   dec->base.set_decode_target = vl_mpeg12_set_decode_target;
+   dec->base.set_reference_frames = vl_mpeg12_set_reference_frames;
+   dec->base.begin_frame = vl_mpeg12_begin_frame;
+   dec->base.get_ycbcr_stream = vl_mpeg12_get_ycbcr_stream;
+   dec->base.get_ycbcr_buffer = vl_mpeg12_get_ycbcr_buffer;
+   dec->base.get_mv_stream_stride = vl_mpeg12_get_mv_stream_stride;
+   dec->base.get_mv_stream = vl_mpeg12_get_mv_stream;
+   dec->base.decode_bitstream = vl_mpeg12_decode_bitstream;
+   dec->base.end_frame = vl_mpeg12_end_frame;
+   dec->base.flush = vl_mpeg12_flush;
 
    dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4);
    dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 01265e368a3..85c84fc1c4a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -73,12 +73,17 @@ struct vl_mpeg12_decoder
    struct vl_mc mc_y, mc_c;
 
    void *dsa;
+
+   struct vl_mpeg12_buffer *current_buffer;
+   struct pipe_mpeg12_picture_desc picture_desc;
+   uint8_t intra_matrix[64];
+   uint8_t non_intra_matrix[64];
+   struct pipe_sampler_view *ref_frames[VL_MAX_REF_FRAMES][VL_MAX_PLANES];
+   struct pipe_surface *target_surfaces[VL_MAX_PLANES];
 };
 
 struct vl_mpeg12_buffer
 {
-   struct pipe_video_decode_buffer base;
-
    struct vl_vertex_buffer vertex_stream;
 
    struct pipe_video_buffer *zscan_source;
diff --git a/src/gallium/include/pipe/p_video_decoder.h b/src/gallium/include/pipe/p_video_decoder.h
index f063d8f3a1b..ae071136bac 100644
--- a/src/gallium/include/pipe/p_video_decoder.h
+++ b/src/gallium/include/pipe/p_video_decoder.h
@@ -59,75 +59,89 @@ struct pipe_video_decoder
    void (*destroy)(struct pipe_video_decoder *decoder);
 
    /**
-    * Creates a buffer as decoding input
+    * Creates a decoder buffer
     */
-   struct pipe_video_decode_buffer *(*create_buffer)(struct pipe_video_decoder *decoder);
+   void *(*create_buffer)(struct pipe_video_decoder *decoder);
 
    /**
-    * flush decoder buffer to video hardware
+    * Destroys a decoder buffer
     */
-   void (*flush_buffer)(struct pipe_video_decode_buffer *decbuf,
-                        unsigned num_ycbcr_blocks[3],
-                        struct pipe_video_buffer *ref_frames[2],
-                        struct pipe_video_buffer *dst);
-};
-
-/**
- * input buffer for a decoder
- */
-struct pipe_video_decode_buffer
-{
-   struct pipe_video_decoder *decoder;
+   void (*destroy_buffer)(struct pipe_video_decoder *decoder, void *buffer);
 
    /**
-    * destroy this decode buffer
+    * set the current decoder buffer
     */
-   void (*destroy)(struct pipe_video_decode_buffer *decbuf);
+   void (*set_decode_buffer)(struct pipe_video_decoder *decoder, void *buffer);
 
    /**
-    * map the input buffer into memory before starting decoding
+    * set the picture parameters for the next frame
+    * only used for bitstream decoding
     */
-   void (*begin_frame)(struct pipe_video_decode_buffer *decbuf);
+   void (*set_picture_parameters)(struct pipe_video_decoder *decoder,
+                                  struct pipe_picture_desc *picture);
 
    /**
     * set the quantification matrixes
     */
-   void (*set_quant_matrix)(struct pipe_video_decode_buffer *decbuf,
+   void (*set_quant_matrix)(struct pipe_video_decoder *decoder,
                             const uint8_t intra_matrix[64],
                             const uint8_t non_intra_matrix[64]);
 
+   /**
+    * set target where video data is decoded to
+    */
+   void (*set_decode_target)(struct pipe_video_decoder *decoder,
+                             struct pipe_video_buffer *target);
+
+   /**
+    * set reference frames for motion compensation
+    */
+   void (*set_reference_frames)(struct pipe_video_decoder *decoder,
+                                struct pipe_video_buffer **ref_frames,
+                                unsigned num_ref_frames);
+
+   /**
+    * start decoding of a new frame
+    */
+   void (*begin_frame)(struct pipe_video_decoder *decoder);
+
    /**
     * get the pointer where to put the ycbcr blocks of a component
     */
-   struct pipe_ycbcr_block *(*get_ycbcr_stream)(struct pipe_video_decode_buffer *, int component);
+   struct pipe_ycbcr_block *(*get_ycbcr_stream)(struct pipe_video_decoder *decoder, int component);
 
    /**
     * get the pointer where to put the ycbcr dct block data of a component
     */
-   short *(*get_ycbcr_buffer)(struct pipe_video_decode_buffer *, int component);
+   short *(*get_ycbcr_buffer)(struct pipe_video_decoder *decoder, int component);
 
    /**
     * get the stride of the mv buffer
     */
-   unsigned (*get_mv_stream_stride)(struct pipe_video_decode_buffer *decbuf);
+   unsigned (*get_mv_stream_stride)(struct pipe_video_decoder *decoder);
 
    /**
     * get the pointer where to put the motion vectors of a ref frame
     */
-   struct pipe_motionvector *(*get_mv_stream)(struct pipe_video_decode_buffer *decbuf, int ref_frame);
+   struct pipe_motionvector *(*get_mv_stream)(struct pipe_video_decoder *decoder, int ref_frame);
 
    /**
     * decode a bitstream
     */
-   void (*decode_bitstream)(struct pipe_video_decode_buffer *decbuf,
+   void (*decode_bitstream)(struct pipe_video_decoder *decoder,
                             unsigned num_bytes, const void *data,
-                            struct pipe_picture_desc *picture,
                             unsigned num_ycbcr_blocks[3]);
 
    /**
-    * unmap decoder buffer before flushing
+    * end decoding of the current frame
     */
-   void (*end_frame)(struct pipe_video_decode_buffer *decbuf);
+   void (*end_frame)(struct pipe_video_decoder *decoder, unsigned num_ycbcr_blocks[3]);
+
+   /**
+    * flush any outstanding command buffers to the hardware
+    * should be called before a video_buffer is acessed by the state tracker again
+    */
+   void (*flush)(struct pipe_video_decoder *decoder);
 };
 
 /**
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 96542f874d9..3bf05bea21f 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -107,7 +107,7 @@ error_buffer:
 
    for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i)
       if (vldecoder->buffer[i])
-         vldecoder->buffer[i]->destroy(vldecoder->buffer[i]);
+         vldecoder->decoder->destroy_buffer(vldecoder->decoder, vldecoder->buffer[i]);
 
    vldecoder->decoder->destroy(vldecoder->decoder);
 
@@ -130,7 +130,7 @@ vlVdpDecoderDestroy(VdpDecoder decoder)
 
    for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i)
       if (vldecoder->buffer[i])
-         vldecoder->buffer[i]->destroy(vldecoder->buffer[i]);
+         vldecoder->decoder->destroy_buffer(vldecoder->decoder, vldecoder->buffer[i]);
 
    vldecoder->decoder->destroy(vldecoder->decoder);
 
@@ -162,8 +162,6 @@ vlVdpDecoderGetParameters(VdpDecoder decoder,
 
 static VdpStatus
 vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder,
-                         struct pipe_video_decode_buffer *buffer,
-                         struct pipe_video_buffer *target,
                          VdpPictureInfoMPEG1Or2 *picture_info,
                          uint32_t bitstream_buffer_count,
                          VdpBitstreamBuffer const *bitstream_buffers)
@@ -176,23 +174,25 @@ vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder,
 
    VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Decoding MPEG2\n");
 
+   i = 0;
+
    /* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */
-   if (picture_info->forward_reference ==  VDP_INVALID_HANDLE)
-      ref_frames[0] = NULL;
-   else {
-      ref_frames[0] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference))->video_buffer;
-      if (!ref_frames[0])
+   if (picture_info->forward_reference !=  VDP_INVALID_HANDLE) {
+      ref_frames[i] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference))->video_buffer;
+      if (!ref_frames[i])
          return VDP_STATUS_INVALID_HANDLE;
+      ++i;
    }
 
-   if (picture_info->backward_reference ==  VDP_INVALID_HANDLE)
-      ref_frames[1] = NULL;
-   else {
-      ref_frames[1] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference))->video_buffer;
-      if (!ref_frames[1])
+   if (picture_info->backward_reference !=  VDP_INVALID_HANDLE) {
+      ref_frames[i] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference))->video_buffer;
+      if (!ref_frames[i])
          return VDP_STATUS_INVALID_HANDLE;
+      ++i;
    }
 
+   decoder->set_reference_frames(decoder, ref_frames, i);
+
    memset(&picture, 0, sizeof(picture));
    picture.base.profile = decoder->profile;
    picture.picture_coding_type = picture_info->picture_coding_type;
@@ -207,19 +207,19 @@ vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder,
    picture.f_code[1][0] = picture_info->f_code[1][0] - 1;
    picture.f_code[1][1] = picture_info->f_code[1][1] - 1;
 
-   buffer->begin_frame(buffer);
+   decoder->set_picture_parameters(decoder, &picture.base);
 
    memcpy(intra_quantizer_matrix, picture_info->intra_quantizer_matrix, sizeof(intra_quantizer_matrix));
    intra_quantizer_matrix[0] = 1 << (7 - picture_info->intra_dc_precision);
-   buffer->set_quant_matrix(buffer, intra_quantizer_matrix, picture_info->non_intra_quantizer_matrix);
+   decoder->set_quant_matrix(decoder, intra_quantizer_matrix, picture_info->non_intra_quantizer_matrix);
+
+   decoder->begin_frame(decoder);
 
    for (i = 0; i < bitstream_buffer_count; ++i)
-      buffer->decode_bitstream(buffer, bitstream_buffers[i].bitstream_bytes,
-                               bitstream_buffers[i].bitstream, &picture.base, num_ycbcr_blocks);
+      decoder->decode_bitstream(decoder, bitstream_buffers[i].bitstream_bytes,
+                                bitstream_buffers[i].bitstream, num_ycbcr_blocks);
 
-   buffer->end_frame(buffer);
-
-   decoder->flush_buffer(buffer, num_ycbcr_blocks, ref_frames, target);
+   decoder->end_frame(decoder, num_ycbcr_blocks);
 
    return VDP_STATUS_OK;
 }
@@ -261,11 +261,12 @@ vlVdpDecoderRender(VdpDecoder decoder,
    case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
       ++vldecoder->cur_buffer;
       vldecoder->cur_buffer %= VL_NUM_DECODE_BUFFERS;
-      return vlVdpDecoderRenderMpeg12(vldecoder->decoder,
-                                      vldecoder->buffer[vldecoder->cur_buffer],
-                                      vlsurf->video_buffer,
-                                      (VdpPictureInfoMPEG1Or2 *)picture_info,
-                                      bitstream_buffer_count,bitstream_buffers);
+
+      vldecoder->decoder->set_decode_buffer(vldecoder->decoder, vldecoder->buffer[vldecoder->cur_buffer]);
+      vldecoder->decoder->set_decode_target(vldecoder->decoder, vlsurf->video_buffer);
+
+      return vlVdpDecoderRenderMpeg12(vldecoder->decoder, (VdpPictureInfoMPEG1Or2 *)picture_info,
+                                      bitstream_buffer_count, bitstream_buffers);
       break;
 
    default:
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index e5d945629fb..5c68cd7c47a 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -256,7 +256,7 @@ typedef struct
 {
    vlVdpDevice *device;
    struct pipe_video_decoder *decoder;
-   struct pipe_video_decode_buffer *buffer[VL_NUM_DECODE_BUFFERS];
+   void *buffer[VL_NUM_DECODE_BUFFERS];
    unsigned cur_buffer;
 } vlVdpDecoder;
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 0c53b730287..002c35ae445 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -252,9 +252,37 @@ MacroBlocksToPipe(XvMCSurfacePrivate *surface,
 }
 
 static void
-unmap_and_flush_surface(XvMCSurfacePrivate *surface)
+SetDecoderStatus(XvMCSurfacePrivate *surface)
 {
+   struct pipe_video_decoder *decoder;
    struct pipe_video_buffer *ref_frames[2];
+
+   XvMCContextPrivate *context_priv;
+
+   unsigned i, num_refs = 0;
+
+   assert(surface);
+
+   context_priv = surface->context->privData;
+   decoder = context_priv->decoder;
+
+   decoder->set_decode_buffer(decoder, surface->decode_buffer);
+   decoder->set_decode_target(decoder, surface->video_buffer);
+
+   for (i = 0; i < 2; ++i) {
+      if (surface->ref[i].surface) {
+         XvMCSurfacePrivate *ref = surface->ref[i].surface->privData;
+
+         if (ref)
+            ref_frames[num_refs++] = ref->video_buffer;
+      }
+   }
+   decoder->set_reference_frames(decoder, ref_frames, num_refs);
+}
+
+static void
+RecursiveEndFrame(XvMCSurfacePrivate *surface)
+{
    XvMCContextPrivate *context_priv;
    unsigned i, num_ycbcr_blocks[3];
 
@@ -264,27 +292,27 @@ unmap_and_flush_surface(XvMCSurfacePrivate *surface)
 
    for ( i = 0; i < 2; ++i ) {
       if (surface->ref[i].surface) {
-         XvMCSurfacePrivate *ref = surface->ref[i].surface->privData;
+         XvMCSurface *ref = surface->ref[i].surface;
 
          assert(ref);
 
-         unmap_and_flush_surface(ref);
          surface->ref[i].surface = NULL;
-         ref_frames[i] = ref->video_buffer;
-      } else {
-         ref_frames[i] = NULL;
+         RecursiveEndFrame(ref->privData);
+         surface->ref[i].surface = ref;
       }
    }
 
-   if (surface->mapped) {
-      surface->decode_buffer->end_frame(surface->decode_buffer);
+   if (surface->frame_started) {
+      surface->frame_started = 0;
+      SetDecoderStatus(surface);
+
       for (i = 0; i < 3; ++i)
          num_ycbcr_blocks[i] = surface->ycbcr[i].num_blocks_added;
-      context_priv->decoder->flush_buffer(surface->decode_buffer,
-                                          num_ycbcr_blocks,
-                                          ref_frames,
-                                          surface->video_buffer);
-      surface->mapped = 0;
+
+      for (i = 0; i < 2; ++i)
+         surface->ref[i].surface = NULL;
+
+      context_priv->decoder->end_frame(context_priv->decoder, num_ycbcr_blocks);
    }
 }
 
@@ -323,9 +351,7 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
       return BadAlloc;
 
    surface_priv->decode_buffer = context_priv->decoder->create_buffer(context_priv->decoder);
-   surface_priv->decode_buffer->set_quant_matrix(surface_priv->decode_buffer, dummy_quant, dummy_quant);
-
-   surface_priv->mv_stride = surface_priv->decode_buffer->get_mv_stream_stride(surface_priv->decode_buffer);
+   context_priv->decoder->set_quant_matrix(context_priv->decoder, dummy_quant, dummy_quant);
    surface_priv->video_buffer = pipe->create_video_buffer
    (
       pipe, PIPE_FORMAT_NV12, context_priv->decoder->chroma_format,
@@ -355,8 +381,9 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
                          XvMCMacroBlockArray *macroblocks, XvMCBlockArray *blocks
 )
 {
-   struct pipe_video_decode_buffer *t_buffer;
+   struct pipe_video_decoder *decoder;
 
+   XvMCContextPrivate *context_priv;
    XvMCSurfacePrivate *target_surface_priv;
    XvMCSurfacePrivate *past_surface_priv;
    XvMCSurfacePrivate *future_surface_priv;
@@ -394,6 +421,9 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
 
    assert(flags == 0 || flags == XVMC_SECOND_FIELD);
 
+   context_priv = context->privData;
+   decoder = context_priv->decoder;
+
    target_surface_priv = target_surface->privData;
    past_surface_priv = past_surface ? past_surface->privData : NULL;
    future_surface_priv = future_surface ? future_surface->privData : NULL;
@@ -402,47 +432,48 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    assert(!past_surface || past_surface_priv->context == context);
    assert(!future_surface || future_surface_priv->context == context);
 
-   t_buffer = target_surface_priv->decode_buffer;
-
-   // enshure that all reference frames are flushed
-   // not really nessasary, but speeds ups rendering
+   // call end frame on all referenced frames
    if (past_surface)
-      unmap_and_flush_surface(past_surface->privData);
+      RecursiveEndFrame(past_surface->privData);
 
    if (future_surface)
-      unmap_and_flush_surface(future_surface->privData);
+      RecursiveEndFrame(future_surface->privData);
 
    xvmc_mb = macroblocks->macro_blocks + first_macroblock;
 
    /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
-   if (target_surface_priv->mapped && (
+   if (target_surface_priv->frame_started && (
        target_surface_priv->ref[0].surface != past_surface ||
        target_surface_priv->ref[1].surface != future_surface ||
        (xvmc_mb->x == 0 && xvmc_mb->y == 0))) {
 
-      // If they change anyway we need to clear our surface
-      unmap_and_flush_surface(target_surface_priv);
+      // If they change anyway we must assume that the current frame is ended
+      RecursiveEndFrame(target_surface_priv);
    }
 
-   if (!target_surface_priv->mapped) {
-      t_buffer->begin_frame(t_buffer);
+   target_surface_priv->ref[0].surface = past_surface;
+   target_surface_priv->ref[1].surface = future_surface;
 
+   SetDecoderStatus(target_surface_priv);
+
+   if (!target_surface_priv->frame_started) {
+      decoder->begin_frame(decoder);
+
+      target_surface_priv->mv_stride = decoder->get_mv_stream_stride(decoder);
       for (i = 0; i < 3; ++i) {
          target_surface_priv->ycbcr[i].num_blocks_added = 0;
-         target_surface_priv->ycbcr[i].stream = t_buffer->get_ycbcr_stream(t_buffer, i);
-         target_surface_priv->ycbcr[i].buffer = t_buffer->get_ycbcr_buffer(t_buffer, i);
+         target_surface_priv->ycbcr[i].stream = decoder->get_ycbcr_stream(decoder, i);
+         target_surface_priv->ycbcr[i].buffer = decoder->get_ycbcr_buffer(decoder, i);
       }
 
       for (i = 0; i < 2; ++i) {
-         target_surface_priv->ref[i].surface = i == 0 ? past_surface : future_surface;
-
          if (target_surface_priv->ref[i].surface)
-            target_surface_priv->ref[i].mv = t_buffer->get_mv_stream(t_buffer, i);
+            target_surface_priv->ref[i].mv = decoder->get_mv_stream(decoder, i);
          else
             target_surface_priv->ref[i].mv = NULL;
       }
 
-      target_surface_priv->mapped = 1;
+      target_surface_priv->frame_started = 1;
    }
 
    MacroBlocksToPipe(target_surface_priv, picture_structure, xvmc_mb, blocks, num_macroblocks);
@@ -543,7 +574,9 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    assert(desty + desth - 1 < drawable_surface->height);
     */
 
-   unmap_and_flush_surface(surface_priv);
+   RecursiveEndFrame(surface_priv);
+
+   context_priv->decoder->flush(context_priv->decoder);
 
    vl_compositor_clear_layers(compositor);
    vl_compositor_set_buffer_layer(compositor, 0, surface_priv->video_buffer, &src_rect, NULL);
@@ -630,6 +663,9 @@ PUBLIC
 Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
 {
    XvMCSurfacePrivate *surface_priv;
+   XvMCContextPrivate *context_priv;
+
+   unsigned num_ycbcr_buffers[3] = { 0, 0, 0 };
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Destroying surface %p.\n", surface);
 
@@ -639,10 +675,13 @@ Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
       return XvMCBadSurface;
 
    surface_priv = surface->privData;
+   context_priv = surface_priv->context->privData;
    
-   if (surface_priv->mapped)
-      surface_priv->decode_buffer->end_frame(surface_priv->decode_buffer);
-   surface_priv->decode_buffer->destroy(surface_priv->decode_buffer);
+   if (surface_priv->frame_started) {
+      SetDecoderStatus(surface_priv);
+      context_priv->decoder->end_frame(context_priv->decoder, num_ycbcr_buffers);
+   }
+   context_priv->decoder->destroy_buffer(context_priv->decoder, surface_priv->decode_buffer);
    surface_priv->video_buffer->destroy(surface_priv->video_buffer);
    FREE(surface_priv);
    surface->privData = NULL;
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 5f8d9d13cb3..5b3debdb78b 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -45,7 +45,6 @@
 struct vl_context;
 
 struct pipe_video_decoder;
-struct pipe_video_decode_buffer;
 struct pipe_video_buffer;
 
 struct pipe_sampler_view;
@@ -70,10 +69,11 @@ typedef struct
 
 typedef struct
 {
-   struct pipe_video_decode_buffer *decode_buffer;
+   void *decode_buffer;
    struct pipe_video_buffer *video_buffer;
 
-   bool mapped; // are we still mapped to memory?
+   // have we allready told the decoder to start a frame
+   bool frame_started;
 
    struct {
       unsigned num_blocks_added;

From 231fce7d630bcf6aaf0e435e461ad5af842e437f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <deathsimple@vodafone.de>
Date: Tue, 9 Aug 2011 19:27:57 +0200
Subject: [PATCH 582/600] g3dvl: Rework the decoder interface part 2/5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement PIPE_CAP_NUM_BUFFERS_DESIRED giving the decoder control over
the number of buffers a state tracker should allocate.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Younes Manton <younes.m@gmail.com>
---
 src/gallium/auxiliary/vl/vl_decoder.c         | 13 ++++++
 src/gallium/auxiliary/vl/vl_decoder.h         |  6 +++
 src/gallium/drivers/nouveau/nouveau_video.c   |  2 +
 src/gallium/drivers/nvfx/nvfx_screen.c        |  2 +
 src/gallium/drivers/r300/r300_screen.c        |  2 +
 src/gallium/drivers/r600/r600_pipe.c          |  2 +
 src/gallium/drivers/softpipe/sp_screen.c      |  2 +
 src/gallium/include/pipe/p_video_enums.h      |  3 +-
 src/gallium/state_trackers/vdpau/decode.c     | 41 +++++++++++++------
 .../state_trackers/vdpau/vdpau_private.h      |  4 +-
 10 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_decoder.c b/src/gallium/auxiliary/vl/vl_decoder.c
index fac03359a0f..b23827d300a 100644
--- a/src/gallium/auxiliary/vl/vl_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_decoder.c
@@ -44,6 +44,19 @@ vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile
    }
 }
 
+unsigned
+vl_num_buffers_desired(struct pipe_screen *screen, enum pipe_video_profile profile)
+{
+   assert(screen);
+   switch (u_reduce_video_profile(profile)) {
+      case PIPE_VIDEO_CODEC_MPEG12:
+         return 4;
+
+      default:
+         return 1;
+   }
+}
+
 struct pipe_video_decoder *
 vl_create_decoder(struct pipe_context *pipe,
                   enum pipe_video_profile profile,
diff --git a/src/gallium/auxiliary/vl/vl_decoder.h b/src/gallium/auxiliary/vl/vl_decoder.h
index 0e9280dbfa2..fed529c9bc7 100644
--- a/src/gallium/auxiliary/vl/vl_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_decoder.h
@@ -37,6 +37,12 @@
 bool
 vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile);
 
+/**
+ * the desired number of buffers for optimal operation
+ */
+unsigned
+vl_num_buffers_desired(struct pipe_screen *screen, enum pipe_video_profile profile);
+
 /**
  * standard implementation of pipe->create_video_decoder
  */
diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c
index 32f038dae61..620c030e112 100644
--- a/src/gallium/drivers/nouveau/nouveau_video.c
+++ b/src/gallium/drivers/nouveau/nouveau_video.c
@@ -18,6 +18,8 @@ nouveau_screen_get_video_param(struct pipe_screen *pscreen,
    case PIPE_VIDEO_CAP_MAX_WIDTH:
    case PIPE_VIDEO_CAP_MAX_HEIGHT:
       return vl_video_buffer_max_size(pscreen);
+   case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
+      return vl_num_buffers_desired(pscreen, profile);
    default:
       debug_printf("unknown video param: %d\n", param);
       return 0;
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index 0e8f96772c6..3b77c9600c6 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -226,6 +226,8 @@ nvfx_screen_get_video_param(struct pipe_screen *screen,
 	case PIPE_VIDEO_CAP_MAX_WIDTH:
 	case PIPE_VIDEO_CAP_MAX_HEIGHT:
 		return vl_video_buffer_max_size(screen);
+	case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
+		return vl_num_buffers_desired(screen, profile);
 	default:
 		return 0;
 	}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 13d25ba7dba..8c0500c7dfd 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -313,6 +313,8 @@ static int r300_get_video_param(struct pipe_screen *screen,
       case PIPE_VIDEO_CAP_MAX_WIDTH:
       case PIPE_VIDEO_CAP_MAX_HEIGHT:
          return vl_video_buffer_max_size(screen);
+      case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
+         return vl_num_buffers_desired(screen, profile);
       default:
          return 0;
    }
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index d180e36aa16..ceaebbb4431 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -505,6 +505,8 @@ static int r600_get_video_param(struct pipe_screen *screen,
 	case PIPE_VIDEO_CAP_MAX_WIDTH:
 	case PIPE_VIDEO_CAP_MAX_HEIGHT:
 		return vl_video_buffer_max_size(screen);
+	case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
+		return vl_num_buffers_desired(screen, profile);
 	default:
 		return 0;
 	}
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 1e58d27be88..960ab8c96ae 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -185,6 +185,8 @@ softpipe_get_video_param(struct pipe_screen *screen,
    case PIPE_VIDEO_CAP_MAX_WIDTH:
    case PIPE_VIDEO_CAP_MAX_HEIGHT:
       return vl_video_buffer_max_size(screen);
+   case PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED:
+      return vl_num_buffers_desired(screen, profile);
    default:
       return 0;
    }
diff --git a/src/gallium/include/pipe/p_video_enums.h b/src/gallium/include/pipe/p_video_enums.h
index 13786067d53..ea25a25883d 100644
--- a/src/gallium/include/pipe/p_video_enums.h
+++ b/src/gallium/include/pipe/p_video_enums.h
@@ -50,7 +50,8 @@ enum pipe_video_cap
    PIPE_VIDEO_CAP_SUPPORTED = 0,
    PIPE_VIDEO_CAP_NPOT_TEXTURES = 1,
    PIPE_VIDEO_CAP_MAX_WIDTH = 2,
-   PIPE_VIDEO_CAP_MAX_HEIGHT = 3
+   PIPE_VIDEO_CAP_MAX_HEIGHT = 3,
+   PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED = 4
 };
 
 enum pipe_video_codec
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 3bf05bea21f..3527f733809 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -82,13 +82,22 @@ vlVdpDecoderCreate(VdpDevice device,
       goto error_decoder;
    }
 
+   vldecoder->num_buffers = pipe->screen->get_video_param
+   (
+      pipe->screen, p_profile,
+      PIPE_VIDEO_CAP_NUM_BUFFERS_DESIRED
+   );
    vldecoder->cur_buffer = 0;
 
-   for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i) {
-      vldecoder->buffer[i] = vldecoder->decoder->create_buffer(vldecoder->decoder);
-      if (!vldecoder->buffer[i]) {
+   vldecoder->buffers = CALLOC(vldecoder->num_buffers, sizeof(void*));
+   if (!vldecoder->buffers)
+         goto error_alloc_buffers;
+
+   for (i = 0; i < vldecoder->num_buffers; ++i) {
+      vldecoder->buffers[i] = vldecoder->decoder->create_buffer(vldecoder->decoder);
+      if (!vldecoder->buffers[i]) {
          ret = VDP_STATUS_ERROR;
-         goto error_buffer;
+         goto error_create_buffers;
       }
    }
 
@@ -103,11 +112,15 @@ vlVdpDecoderCreate(VdpDevice device,
    return VDP_STATUS_OK;
 
 error_handle:
-error_buffer:
+error_create_buffers:
 
-   for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i)
-      if (vldecoder->buffer[i])
-         vldecoder->decoder->destroy_buffer(vldecoder->decoder, vldecoder->buffer[i]);
+   for (i = 0; i < vldecoder->num_buffers; ++i)
+      if (vldecoder->buffers[i])
+         vldecoder->decoder->destroy_buffer(vldecoder->decoder, vldecoder->buffers[i]);
+
+   FREE(vldecoder->buffers);
+
+error_alloc_buffers:
 
    vldecoder->decoder->destroy(vldecoder->decoder);
 
@@ -128,9 +141,11 @@ vlVdpDecoderDestroy(VdpDecoder decoder)
    if (!vldecoder)
       return VDP_STATUS_INVALID_HANDLE;
 
-   for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i)
-      if (vldecoder->buffer[i])
-         vldecoder->decoder->destroy_buffer(vldecoder->decoder, vldecoder->buffer[i]);
+   for (i = 0; i < vldecoder->num_buffers; ++i)
+      if (vldecoder->buffers[i])
+         vldecoder->decoder->destroy_buffer(vldecoder->decoder, vldecoder->buffers[i]);
+
+   FREE(vldecoder->buffers);
 
    vldecoder->decoder->destroy(vldecoder->decoder);
 
@@ -260,9 +275,9 @@ vlVdpDecoderRender(VdpDecoder decoder,
    case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
    case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
       ++vldecoder->cur_buffer;
-      vldecoder->cur_buffer %= VL_NUM_DECODE_BUFFERS;
+      vldecoder->cur_buffer %= vldecoder->num_buffers;
 
-      vldecoder->decoder->set_decode_buffer(vldecoder->decoder, vldecoder->buffer[vldecoder->cur_buffer]);
+      vldecoder->decoder->set_decode_buffer(vldecoder->decoder, vldecoder->buffers[vldecoder->cur_buffer]);
       vldecoder->decoder->set_decode_target(vldecoder->decoder, vlsurf->video_buffer);
 
       return vlVdpDecoderRenderMpeg12(vldecoder->decoder, (VdpPictureInfoMPEG1Or2 *)picture_info,
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 5c68cd7c47a..5482eff0630 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -46,7 +46,6 @@
 #define TOSTRING(x) QUOTEME(x)
 #define INFORMATION_STRING TOSTRING(INFORMATION)
 #define VL_HANDLES
-#define VL_NUM_DECODE_BUFFERS 4
 
 static inline enum pipe_video_chroma_format
 ChromaToPipe(VdpChromaType vdpau_type)
@@ -256,7 +255,8 @@ typedef struct
 {
    vlVdpDevice *device;
    struct pipe_video_decoder *decoder;
-   void *buffer[VL_NUM_DECODE_BUFFERS];
+   unsigned num_buffers;
+   void **buffers;
    unsigned cur_buffer;
 } vlVdpDecoder;
 

From d3770d6229d95e9beb67358ae2b2c8824ed3ae58 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <deathsimple@vodafone.de>
Date: Wed, 10 Aug 2011 18:07:01 +0200
Subject: [PATCH 583/600] g3dvl: Rework the decoder interface part 3/5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert back to a macroblock based interface. The structure used
tries to keep as close to the spec as possible.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Younes Manton <younes.m@gmail.com>
---
 .../auxiliary/vl/vl_mpeg12_bitstream.c        |  34 +--
 .../auxiliary/vl/vl_mpeg12_bitstream.h        |   9 +-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c  | 273 +++++++++++++++---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h  |   6 +
 src/gallium/auxiliary/vl/vl_vertex_buffers.c  |  12 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.h  |  51 +++-
 src/gallium/include/pipe/p_video_decoder.h    |  26 +-
 src/gallium/include/pipe/p_video_state.h      | 111 ++++---
 src/gallium/state_trackers/vdpau/decode.c     |   5 +-
 .../state_trackers/xorg/xvmc/surface.c        | 263 ++++-------------
 .../state_trackers/xorg/xvmc/xvmc_private.h   |  12 +-
 11 files changed, 439 insertions(+), 363 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 9dd032e911d..61ffcd1b7b2 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -1197,7 +1197,7 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
 
    bs->ycbcr_stream[cc]->x = x;
    bs->ycbcr_stream[cc]->y = y;
-   bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_INTRA;
+   bs->ycbcr_stream[cc]->intra = 1;
    bs->ycbcr_stream[cc]->coding = coding;
 
    vl_vlc_needbits(&bs->vlc);
@@ -1233,7 +1233,7 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 
    bs->ycbcr_stream[cc]->x = x;
    bs->ycbcr_stream[cc]->y = y;
-   bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_DELTA;
+   bs->ycbcr_stream[cc]->intra = 0;
    bs->ycbcr_stream[cc]->coding = coding;
 
    memset(dest, 0, sizeof(int16_t) * 64);
@@ -1250,7 +1250,7 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 }
 
 static INLINE void
-motion_mp1(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+motion_mp1(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
 {
    int motion_x, motion_y;
 
@@ -1268,7 +1268,7 @@ motion_mp1(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector
 }
 
 static INLINE void
-motion_fr_frame(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+motion_fr_frame(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
 {
    int motion_x, motion_y;
 
@@ -1286,7 +1286,7 @@ motion_fr_frame(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionve
 }
 
 static INLINE void
-motion_fr_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+motion_fr_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
 {
    int motion_x, motion_y;
 
@@ -1320,7 +1320,7 @@ motion_fr_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionve
 }
 
 static INLINE void
-motion_fr_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+motion_fr_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
 {
    int motion_x, motion_y;
 
@@ -1340,7 +1340,7 @@ motion_fr_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvect
 
 /* like motion_frame, but parsing without actual motion compensation */
 static INLINE void
-motion_fr_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+motion_fr_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
 {
    int tmp;
 
@@ -1360,7 +1360,7 @@ motion_fr_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motion
 }
 
 static INLINE void
-motion_fi_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+motion_fi_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
 {
    int motion_x, motion_y;
 
@@ -1384,7 +1384,7 @@ motion_fi_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionve
 }
 
 static INLINE void
-motion_fi_16x8(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+motion_fi_16x8(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
 {
    int motion_x, motion_y;
 
@@ -1425,7 +1425,7 @@ motion_fi_16x8(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvec
 }
 
 static INLINE void
-motion_fi_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+motion_fi_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
 {
    int motion_x, motion_y;
 
@@ -1445,7 +1445,7 @@ motion_fi_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvect
 
 
 static INLINE void
-motion_fi_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+motion_fi_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
 {
    int tmp;
 
@@ -1474,8 +1474,8 @@ do {							\
 
 static INLINE void
 store_motionvectors(struct vl_mpg12_bs *bs, unsigned *mv_pos,
-                    struct pipe_motionvector *mv_fwd,
-                    struct pipe_motionvector *mv_bwd)
+                    struct vl_motionvector *mv_fwd,
+                    struct vl_motionvector *mv_bwd)
 {
    bs->mv_stream[0][*mv_pos].top = mv_fwd->top;
    bs->mv_stream[0][*mv_pos].bottom =
@@ -1554,8 +1554,8 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
 static INLINE bool
 decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
 {
-   enum pipe_video_field_select default_field_select;
-   struct pipe_motionvector mv_fwd, mv_bwd;
+   enum vl_field_select default_field_select;
+   struct vl_motionvector mv_fwd, mv_bwd;
    enum pipe_mpeg12_dct_type dct_type;
 
    /* predictor for DC coefficients in intra blocks */
@@ -1787,8 +1787,8 @@ vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height)
 }
 
 void
-vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
-                        short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES])
+vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
+                        short *ycbcr_buffer[VL_MAX_PLANES], struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES])
 {
    unsigned i;
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
index 4e48a9faa2f..8a35dc49d2d 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
@@ -30,6 +30,7 @@
 
 #include "vl_defines.h"
 #include "vl_vlc.h"
+#include "vl_vertex_buffers.h"
 
 struct vl_mpg12_bs
 {
@@ -39,18 +40,18 @@ struct vl_mpg12_bs
 
    unsigned *num_ycbcr_blocks;
 
-   struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
+   struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
    short *ycbcr_buffer[VL_MAX_PLANES];
 
-   struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES];
+   struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES];
 };
 
 void
 vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height);
 
 void
-vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
-                        short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]);
+vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
+                        short *ycbcr_buffer[VL_MAX_PLANES], struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES]);
 
 void
 vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 228a386ce4a..805a2215133 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -75,6 +75,12 @@ static const struct format_config mc_format_config[] = {
 static const unsigned num_mc_format_configs =
    sizeof(mc_format_config) / sizeof(struct format_config);
 
+static const unsigned const_empty_block_mask_420[3][2][2] = {
+   { { 0x20, 0x10 },  { 0x08, 0x04 } },
+   { { 0x02, 0x02 },  { 0x02, 0x02 } },
+   { { 0x01, 0x01 },  { 0x01, 0x01 } }
+};
+
 static bool
 init_zscan_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer)
 {
@@ -224,6 +230,155 @@ cleanup_mc_buffer(struct vl_mpeg12_buffer *buf)
       vl_mc_cleanup_buffer(&buf->mc[i]);
 }
 
+static inline void
+MacroBlockTypeToPipeWeights(const struct pipe_mpeg12_macroblock *mb, unsigned weights[2])
+{
+   assert(mb);
+
+   switch (mb->macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD)) {
+   case PIPE_MPEG12_MB_TYPE_MOTION_FORWARD:
+      weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX;
+      weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
+      break;
+
+   case (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD):
+      weights[0] = PIPE_VIDEO_MV_WEIGHT_HALF;
+      weights[1] = PIPE_VIDEO_MV_WEIGHT_HALF;
+      break;
+
+   case PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD:
+      weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN;
+      weights[1] = PIPE_VIDEO_MV_WEIGHT_MAX;
+      break;
+
+   default:
+      if (mb->macroblock_type & PIPE_MPEG12_MB_TYPE_PATTERN) {
+         /* patern without a motion vector, just copy the old frame content */
+         weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX;
+         weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
+      } else {
+         weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN;
+         weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
+      }
+      break;
+   }
+}
+
+static inline struct vl_motionvector
+MotionVectorToPipe(const struct pipe_mpeg12_macroblock *mb, unsigned vector,
+                   unsigned field_select_mask, unsigned weight)
+{
+   struct vl_motionvector mv;
+
+   assert(mb);
+
+   if (mb->macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD)) {
+      switch (mb->macroblock_modes.bits.frame_motion_type) {
+      case PIPE_MPEG12_MO_TYPE_FRAME:
+         mv.top.x = mb->PMV[0][vector][0];
+         mv.top.y = mb->PMV[0][vector][1];
+         mv.top.field_select = PIPE_VIDEO_FRAME;
+         mv.top.weight = weight;
+
+         mv.bottom.x = mb->PMV[0][vector][0];
+         mv.bottom.y = mb->PMV[0][vector][1];
+         mv.bottom.weight = weight;
+         mv.bottom.field_select = PIPE_VIDEO_FRAME;
+         break;
+
+      case PIPE_MPEG12_MO_TYPE_FIELD:
+         mv.top.x = mb->PMV[0][vector][0];
+         mv.top.y = mb->PMV[0][vector][1];
+         mv.top.field_select = (mb->motion_vertical_field_select & field_select_mask) ?
+            PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
+         mv.top.weight = weight;
+
+         mv.bottom.x = mb->PMV[1][vector][0];
+         mv.bottom.y = mb->PMV[1][vector][1];
+         mv.bottom.field_select = (mb->motion_vertical_field_select & (field_select_mask << 2)) ?
+            PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
+         mv.bottom.weight = weight;
+         break;
+
+      default: // TODO: Support DUALPRIME and 16x8
+         break;
+      }
+   } else {
+      mv.top.x = mv.top.y = 0;
+      mv.top.field_select = PIPE_VIDEO_FRAME;
+      mv.top.weight = weight;
+
+      mv.bottom.x = mv.bottom.y = 0;
+      mv.bottom.field_select = PIPE_VIDEO_FRAME;
+      mv.bottom.weight = weight;
+   }
+   return mv;
+}
+
+static inline void
+UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec,
+                  struct vl_mpeg12_buffer *buf,
+                  const struct pipe_mpeg12_macroblock *mb)
+{
+   unsigned intra;
+   unsigned tb, x, y, luma_blocks;
+   short *blocks;
+
+   assert(dec && buf);
+   assert(mb);
+
+   if (!mb->coded_block_pattern)
+      return;
+
+   blocks = mb->blocks;
+   intra = mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA ? 1 : 0;
+
+   for (y = 0, luma_blocks = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x, ++tb) {
+         if (mb->coded_block_pattern & const_empty_block_mask_420[0][y][x]) {
+
+            struct vl_ycbcr_block *stream = buf->ycbcr_stream[0];
+            stream->x = mb->x * 2 + x;
+            stream->y = mb->y * 2 + y;
+            stream->intra = intra;
+            stream->coding = mb->macroblock_modes.bits.dct_type;
+
+            buf->num_ycbcr_blocks[0]++;
+            buf->ycbcr_stream[0]++;
+
+            luma_blocks++;
+         }
+      }
+   }
+
+   if (luma_blocks > 0) {
+      memcpy(buf->texels[0], blocks, 64 * sizeof(short) * luma_blocks);
+      buf->texels[0] += 64 * luma_blocks;
+      blocks += 64 * luma_blocks;
+   }
+
+   /* TODO: Implement 422, 444 */
+   //assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+
+   for (tb = 1; tb < 3; ++tb) {
+      if (mb->coded_block_pattern & const_empty_block_mask_420[tb][0][0]) {
+
+         struct vl_ycbcr_block *stream = buf->ycbcr_stream[tb];
+         stream->x = mb->x;
+         stream->y = mb->y;
+         stream->intra = intra;
+         stream->coding = 0;
+
+         buf->num_ycbcr_blocks[tb]++;
+         buf->ycbcr_stream[tb]++;
+
+         memcpy(buf->texels[tb], blocks, 64 * sizeof(short));
+         buf->texels[tb] += 64;
+         blocks += 64;
+      }
+   }
+}
+
 static void
 vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
 {
@@ -450,19 +605,19 @@ vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder)
       );
 
       buf->texels[i] = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer[i]);
+
+      buf->num_ycbcr_blocks[i] = 0;
    }
 
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      buf->ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i);
+
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
+      buf->mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
+
    if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
-      struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
-      struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES];
+      vl_mpg12_bs_set_buffers(&buf->bs, buf->ycbcr_stream, buf->texels, buf->mv_stream);
 
-      for (i = 0; i < VL_MAX_PLANES; ++i)
-         ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i);
-
-      for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
-         mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
-
-      vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream);
    } else {
 
       for (i = 0; i < VL_MAX_PLANES; ++i)
@@ -470,52 +625,76 @@ vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder)
    }
 }
 
-static struct pipe_ycbcr_block *
-vl_mpeg12_get_ycbcr_stream(struct pipe_video_decoder *decoder, int component)
+static void
+vl_mpeg12_decode_macroblock(struct pipe_video_decoder *decoder,
+                            const struct pipe_macroblock *macroblocks,
+                            unsigned num_macroblocks)
 {
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   const struct pipe_mpeg12_macroblock *mb = (const struct pipe_mpeg12_macroblock *)macroblocks;
+   struct vl_mpeg12_buffer *buf;
+
+   unsigned i, j, mv_weights[2];
 
    assert(dec && dec->current_buffer);
-   assert(component < VL_MAX_PLANES);
+   assert(macroblocks && macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
 
-   return vl_vb_get_ycbcr_stream(&dec->current_buffer->vertex_stream, component);
-}
+   buf = dec->current_buffer;
+   assert(buf);
 
-static short *
-vl_mpeg12_get_ycbcr_buffer(struct pipe_video_decoder *decoder, int component)
-{
-   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   for (; num_macroblocks > 0; --num_macroblocks) {
+      unsigned mb_addr = mb->y * dec->width_in_macroblocks + mb->x;
 
-   assert(dec && dec->current_buffer);
-   assert(component < VL_MAX_PLANES);
+      if (mb->macroblock_type & (PIPE_MPEG12_MB_TYPE_PATTERN | PIPE_MPEG12_MB_TYPE_INTRA))
+         UploadYcbcrBlocks(dec, buf, mb);
 
-   return dec->current_buffer->texels[component];
-}
+      MacroBlockTypeToPipeWeights(mb, mv_weights);
 
-static unsigned
-vl_mpeg12_get_mv_stream_stride(struct pipe_video_decoder *decoder)
-{
-   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+      for (i = 0; i < 2; ++i) {
+          if (!dec->ref_frames[i][0]) continue;
 
-   assert(dec && dec->current_buffer);
+         buf->mv_stream[i][mb_addr] = MotionVectorToPipe
+         (
+            mb, i,
+            i ? PIPE_MPEG12_FS_FIRST_BACKWARD : PIPE_MPEG12_FS_FIRST_FORWARD,
+            mv_weights[i]
+         );
+      }
 
-   return vl_vb_get_mv_stream_stride(&dec->current_buffer->vertex_stream);
-}
+      /* see section 7.6.6 of the spec */
+      if (mb->num_skipped_macroblocks > 0) {
+         struct vl_motionvector skipped_mv[2];
 
-static struct pipe_motionvector *
-vl_mpeg12_get_mv_stream(struct pipe_video_decoder *decoder, int ref_frame)
-{
-   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+         if (dec->ref_frames[0][0] && !dec->ref_frames[1][0]) {
+            skipped_mv[0].top.x = skipped_mv[0].top.y = 0;
+            skipped_mv[0].top.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+         } else {
+           skipped_mv[0] = buf->mv_stream[0][mb_addr];
+           skipped_mv[1] = buf->mv_stream[1][mb_addr];
+         }
+         skipped_mv[0].top.field_select = PIPE_VIDEO_FRAME;
+         skipped_mv[1].top.field_select = PIPE_VIDEO_FRAME;
 
-   assert(dec && dec->current_buffer);
+         skipped_mv[0].bottom = skipped_mv[0].top;
+         skipped_mv[1].bottom = skipped_mv[1].top;
 
-   return vl_vb_get_mv_stream(&dec->current_buffer->vertex_stream, ref_frame);
+         ++mb_addr;
+         for (i = 0; i < mb->num_skipped_macroblocks; ++i, ++mb_addr) {
+            for (j = 0; j < 2; ++j) {
+               if (!dec->ref_frames[j][0]) continue;
+               buf->mv_stream[j][mb_addr] = skipped_mv[j];
+
+            }
+         }
+      }
+
+      ++mb;
+   }
 }
 
 static void
 vl_mpeg12_decode_bitstream(struct pipe_video_decoder *decoder,
-                           unsigned num_bytes, const void *data,
-                           unsigned num_ycbcr_blocks[3])
+                           unsigned num_bytes, const void *data)
 {
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
    struct vl_mpeg12_buffer *buf;
@@ -531,11 +710,11 @@ vl_mpeg12_decode_bitstream(struct pipe_video_decoder *decoder,
       vl_zscan_set_layout(&buf->zscan[i], dec->picture_desc.alternate_scan ?
                           dec->zscan_alternate : dec->zscan_normal);
 
-   vl_mpg12_bs_decode(&buf->bs, num_bytes, data, &dec->picture_desc, num_ycbcr_blocks);
+   vl_mpg12_bs_decode(&buf->bs, num_bytes, data, &dec->picture_desc, buf->num_ycbcr_blocks);
 }
 
 static void
-vl_mpeg12_end_frame(struct pipe_video_decoder *decoder, unsigned num_ycbcr_blocks[3])
+vl_mpeg12_end_frame(struct pipe_video_decoder *decoder)
 {
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
    struct pipe_sampler_view **mc_source_sv;
@@ -579,15 +758,15 @@ vl_mpeg12_end_frame(struct pipe_video_decoder *decoder, unsigned num_ycbcr_block
 
    dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_ycbcr);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
-      if (!num_ycbcr_blocks[i]) continue;
+      if (!buf->num_ycbcr_blocks[i]) continue;
 
       vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
       dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
 
-      vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]);
+      vl_zscan_render(&buf->zscan[i] , buf->num_ycbcr_blocks[i]);
 
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-         vl_idct_flush(&buf->idct[i], num_ycbcr_blocks[i]);
+         vl_idct_flush(&buf->idct[i], buf->num_ycbcr_blocks[i]);
    }
 
    mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
@@ -596,7 +775,7 @@ vl_mpeg12_end_frame(struct pipe_video_decoder *decoder, unsigned num_ycbcr_block
 
       nr_components = util_format_get_nr_components(dec->target_surfaces[i]->texture->format);
       for (j = 0; j < nr_components; ++j, ++component) {
-         if (!num_ycbcr_blocks[i]) continue;
+         if (!buf->num_ycbcr_blocks[i]) continue;
 
          vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component);
          dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
@@ -607,7 +786,7 @@ vl_mpeg12_end_frame(struct pipe_video_decoder *decoder, unsigned num_ycbcr_block
             dec->base.context->set_fragment_sampler_views(dec->base.context, 1, &mc_source_sv[component]);
             dec->base.context->bind_fragment_sampler_states(dec->base.context, 1, &dec->sampler_ycbcr);
          }
-         vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]);
+         vl_mc_render_ycbcr(&buf->mc[i], j, buf->num_ycbcr_blocks[component]);
       }
    }
 }
@@ -893,16 +1072,14 @@ vl_create_mpeg12_decoder(struct pipe_context *context,
    dec->base.set_decode_target = vl_mpeg12_set_decode_target;
    dec->base.set_reference_frames = vl_mpeg12_set_reference_frames;
    dec->base.begin_frame = vl_mpeg12_begin_frame;
-   dec->base.get_ycbcr_stream = vl_mpeg12_get_ycbcr_stream;
-   dec->base.get_ycbcr_buffer = vl_mpeg12_get_ycbcr_buffer;
-   dec->base.get_mv_stream_stride = vl_mpeg12_get_mv_stream_stride;
-   dec->base.get_mv_stream = vl_mpeg12_get_mv_stream;
+   dec->base.decode_macroblock = vl_mpeg12_decode_macroblock;
    dec->base.decode_bitstream = vl_mpeg12_decode_bitstream;
    dec->base.end_frame = vl_mpeg12_end_frame;
    dec->base.flush = vl_mpeg12_flush;
 
    dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4);
    dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
+   dec->width_in_macroblocks = align(dec->base.width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
 
    dec->quads = vl_vb_upload_quads(dec->base.context);
    dec->pos = vl_vb_upload_pos(
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 85c84fc1c4a..277f5b90e4b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -49,6 +49,7 @@ struct vl_mpeg12_decoder
 
    unsigned blocks_per_line;
    unsigned num_blocks;
+   unsigned width_in_macroblocks;
 
    enum pipe_format zscan_source_format;
 
@@ -86,6 +87,8 @@ struct vl_mpeg12_buffer
 {
    struct vl_vertex_buffer vertex_stream;
 
+   unsigned num_ycbcr_blocks[3];
+
    struct pipe_video_buffer *zscan_source;
 
    struct vl_mpg12_bs bs;
@@ -95,6 +98,9 @@ struct vl_mpeg12_buffer
 
    struct pipe_transfer *tex_transfer[VL_MAX_PLANES];
    short *texels[VL_MAX_PLANES];
+
+   struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
+   struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES];
 };
 
 /**
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index c0f1449bf80..b7aa14bb4db 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -266,7 +266,7 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
          pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
          PIPE_USAGE_STREAM,
-         sizeof(struct pipe_ycbcr_block) * size * 4
+         sizeof(struct vl_ycbcr_block) * size * 4
       );
       if (!buffer->ycbcr[i].resource)
          goto error_ycbcr;
@@ -278,7 +278,7 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
          pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
          PIPE_USAGE_STREAM,
-         sizeof(struct pipe_motionvector) * size
+         sizeof(struct vl_motionvector) * size
       );
       if (!buffer->mv[i].resource)
          goto error_mv;
@@ -310,7 +310,7 @@ vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component)
 
    assert(buffer);
 
-   buf.stride = sizeof(struct pipe_ycbcr_block);
+   buf.stride = sizeof(struct vl_ycbcr_block);
    buf.buffer_offset = 0;
    buf.buffer = buffer->ycbcr[component].resource;
 
@@ -324,7 +324,7 @@ vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector)
 
    assert(buffer);
 
-   buf.stride = sizeof(struct pipe_motionvector);
+   buf.stride = sizeof(struct vl_motionvector);
    buf.buffer_offset = 0;
    buf.buffer = buffer->mv[motionvector].resource;
 
@@ -360,7 +360,7 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 
 }
 
-struct pipe_ycbcr_block *
+struct vl_ycbcr_block *
 vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component)
 {
    assert(buffer);
@@ -377,7 +377,7 @@ vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer)
    return buffer->width;
 }
 
-struct pipe_motionvector *
+struct vl_motionvector *
 vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame)
 {
    assert(buffer);
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 74845a42b69..38db899916d 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -52,20 +52,55 @@ enum VS_INPUT
    NUM_VS_INPUTS = 4
 };
 
+enum vl_mv_weight
+{
+   PIPE_VIDEO_MV_WEIGHT_MIN = 0,
+   PIPE_VIDEO_MV_WEIGHT_HALF = 128,
+   PIPE_VIDEO_MV_WEIGHT_MAX = 256
+};
+
+enum vl_field_select
+{
+   PIPE_VIDEO_FRAME = 0,
+   PIPE_VIDEO_TOP_FIELD = 1,
+   PIPE_VIDEO_BOTTOM_FIELD = 3,
+
+   /* TODO
+   PIPE_VIDEO_DUALPRIME
+   PIPE_VIDEO_16x8
+   */
+};
+
+struct vl_motionvector
+{
+   struct {
+      int16_t x, y;
+      int16_t field_select; /**< enum pipe_video_field_select */
+      int16_t weight;  /**< enum pipe_video_mv_weight  */
+   } top, bottom;
+};
+
+struct vl_ycbcr_block
+{
+   uint8_t x, y;
+   uint8_t intra;
+   uint8_t coding;
+};
+
 struct vl_vertex_buffer
 {
    unsigned width, height;
 
    struct {
-      struct pipe_resource    *resource;
-      struct pipe_transfer    *transfer;
-      struct pipe_ycbcr_block *vertex_stream;
+      struct pipe_resource  *resource;
+      struct pipe_transfer  *transfer;
+      struct vl_ycbcr_block *vertex_stream;
    } ycbcr[VL_MAX_PLANES];
 
    struct {
-      struct pipe_resource     *resource;
-      struct pipe_transfer     *transfer;
-      struct pipe_motionvector *vertex_stream;
+      struct pipe_resource   *resource;
+      struct pipe_transfer   *transfer;
+      struct vl_motionvector *vertex_stream;
    } mv[VL_MAX_REF_FRAMES];
 };
 
@@ -89,13 +124,13 @@ void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
 struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component);
 
-struct pipe_ycbcr_block *vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component);
+struct vl_ycbcr_block *vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component);
 
 struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int ref_frame);
 
 unsigned vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer);
 
-struct pipe_motionvector *vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame);
+struct vl_motionvector *vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame);
 
 void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
diff --git a/src/gallium/include/pipe/p_video_decoder.h b/src/gallium/include/pipe/p_video_decoder.h
index ae071136bac..c7f5877858d 100644
--- a/src/gallium/include/pipe/p_video_decoder.h
+++ b/src/gallium/include/pipe/p_video_decoder.h
@@ -106,36 +106,22 @@ struct pipe_video_decoder
    void (*begin_frame)(struct pipe_video_decoder *decoder);
 
    /**
-    * get the pointer where to put the ycbcr blocks of a component
+    * decode a macroblock
     */
-   struct pipe_ycbcr_block *(*get_ycbcr_stream)(struct pipe_video_decoder *decoder, int component);
-
-   /**
-    * get the pointer where to put the ycbcr dct block data of a component
-    */
-   short *(*get_ycbcr_buffer)(struct pipe_video_decoder *decoder, int component);
-
-   /**
-    * get the stride of the mv buffer
-    */
-   unsigned (*get_mv_stream_stride)(struct pipe_video_decoder *decoder);
-
-   /**
-    * get the pointer where to put the motion vectors of a ref frame
-    */
-   struct pipe_motionvector *(*get_mv_stream)(struct pipe_video_decoder *decoder, int ref_frame);
+   void (*decode_macroblock)(struct pipe_video_decoder *decoder,
+                             const struct pipe_macroblock *macroblocks,
+                             unsigned num_macroblocks);
 
    /**
     * decode a bitstream
     */
    void (*decode_bitstream)(struct pipe_video_decoder *decoder,
-                            unsigned num_bytes, const void *data,
-                            unsigned num_ycbcr_blocks[3]);
+                            unsigned num_bytes, const void *data);
 
    /**
     * end decoding of the current frame
     */
-   void (*end_frame)(struct pipe_video_decoder *decoder, unsigned num_ycbcr_blocks[3]);
+   void (*end_frame)(struct pipe_video_decoder *decoder);
 
    /**
     * flush any outstanding command buffers to the hardware
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 2a64ffb5601..9463af2fbe5 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -50,53 +50,45 @@ enum pipe_mpeg12_picture_type
    PIPE_MPEG12_PICTURE_TYPE_FRAME
 };
 
-enum pipe_mpeg12_dct_intra
+/*
+ * flags for macroblock_type, see section 6.3.17.1 in the spec
+ */
+enum pipe_mpeg12_macroblock_type
 {
-   PIPE_MPEG12_DCT_DELTA = 0,
-   PIPE_MPEG12_DCT_INTRA = 1
+   PIPE_MPEG12_MB_TYPE_QUANT = 0x01,
+   PIPE_MPEG12_MB_TYPE_MOTION_FORWARD = 0x02,
+   PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD = 0x04,
+   PIPE_MPEG12_MB_TYPE_PATTERN = 0x08,
+   PIPE_MPEG12_MB_TYPE_INTRA = 0x10
 };
 
+/*
+ * flags for motion_type, see table 6-17 and 6-18 in the spec
+ */
+enum pipe_mpeg12_motion_type
+{
+   PIPE_MPEG12_MO_TYPE_RESERVED = 0x00,
+   PIPE_MPEG12_MO_TYPE_FIELD = 0x01,
+   PIPE_MPEG12_MO_TYPE_FRAME = 0x02,
+   PIPE_MPEG12_MO_TYPE_16x8 = 0x02,
+   PIPE_MPEG12_MO_TYPE_DUAL_PRIME = 0x03
+};
+
+/*
+ * see section 6.3.17.1 and table 6-19 in the spec
+ */
 enum pipe_mpeg12_dct_type
 {
    PIPE_MPEG12_DCT_TYPE_FRAME = 0,
    PIPE_MPEG12_DCT_TYPE_FIELD = 1
 };
 
-enum pipe_video_field_select
+enum pipe_mpeg12_field_select
 {
-   PIPE_VIDEO_FRAME = 0,
-   PIPE_VIDEO_TOP_FIELD = 1,
-   PIPE_VIDEO_BOTTOM_FIELD = 3,
-
-   /* TODO
-   PIPE_VIDEO_DUALPRIME
-   PIPE_VIDEO_16x8
-   */
-};
-
-enum pipe_video_mv_weight
-{
-   PIPE_VIDEO_MV_WEIGHT_MIN = 0,
-   PIPE_VIDEO_MV_WEIGHT_HALF = 128,
-   PIPE_VIDEO_MV_WEIGHT_MAX = 256
-};
-
-/* bitfields because this is used as a vertex buffer element */
-struct pipe_motionvector
-{
-   struct {
-      short x, y;
-      ushort field_select; /**< enum pipe_video_field_select */
-      ushort weight;  /**< enum pipe_video_mv_weight  */
-   } top, bottom;
-};
-
-/* bitfields because this is used as a vertex buffer element */
-struct pipe_ycbcr_block
-{
-   ubyte x, y;
-   ubyte intra;  /**< enum pipe_mpeg12_dct_intra */
-   ubyte coding; /**< enum pipe_mpeg12_dct_type */
+   PIPE_MPEG12_FS_FIRST_FORWARD = 0x01,
+   PIPE_MPEG12_FS_FIRST_BACKWARD = 0x02,
+   PIPE_MPEG12_FS_SECOND_FORWARD = 0x04,
+   PIPE_MPEG12_FS_SECOND_BACKWARD = 0x08
 };
 
 struct pipe_picture_desc
@@ -104,6 +96,11 @@ struct pipe_picture_desc
    enum pipe_video_profile profile;
 };
 
+struct pipe_macroblock
+{
+   enum pipe_video_codec codec;
+};
+
 struct pipe_mpeg12_picture_desc
 {
    struct pipe_picture_desc base;
@@ -118,6 +115,46 @@ struct pipe_mpeg12_picture_desc
    unsigned f_code[2][2];
 };
 
+struct pipe_mpeg12_macroblock
+{
+   struct pipe_macroblock base;
+
+   /* see section 6.3.17 in the spec */
+   unsigned short x, y;
+
+   /* see section 6.3.17.1 in the spec */
+   unsigned char macroblock_type;
+
+   union {
+      struct {
+         /* see table 6-17 in the spec */
+         unsigned int frame_motion_type:2;
+
+         /* see table 6-18 in the spec */
+         unsigned int field_motion_type:2;
+
+         /* see table 6-19 in the spec */
+         unsigned int dct_type:1;
+      } bits;
+      unsigned int value;
+   } macroblock_modes;
+
+    /* see section 6.3.17.2 in the spec */
+   unsigned char motion_vertical_field_select;
+
+   /* see Table 7-7 in the spec */
+   short PMV[2][2][2];
+
+   /* see figure 6.10-12 in the spec */
+   unsigned short coded_block_pattern;
+
+   /* see figure 6.10-12 in the spec */
+   short *blocks;
+
+   /* Number of skipped macroblocks after this macroblock */
+   unsigned short num_skipped_macroblocks;
+};
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 3527f733809..5ca40f71efe 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -184,7 +184,6 @@ vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder,
    struct pipe_mpeg12_picture_desc picture;
    struct pipe_video_buffer *ref_frames[2];
    uint8_t intra_quantizer_matrix[64];
-   unsigned num_ycbcr_blocks[3] = { 0, 0, 0 };
    unsigned i;
 
    VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Decoding MPEG2\n");
@@ -232,9 +231,9 @@ vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder,
 
    for (i = 0; i < bitstream_buffer_count; ++i)
       decoder->decode_bitstream(decoder, bitstream_buffers[i].bitstream_bytes,
-                                bitstream_buffers[i].bitstream, num_ycbcr_blocks);
+                                bitstream_buffers[i].bitstream);
 
-   decoder->end_frame(decoder, num_ycbcr_blocks);
+   decoder->end_frame(decoder);
 
    return VDP_STATUS_OK;
 }
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 002c35ae445..a4a27cc5b96 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -42,12 +42,6 @@
 
 #include "xvmc_private.h"
 
-static const unsigned const_empty_block_mask_420[3][2][2] = {
-   { { 0x20, 0x10 },  { 0x08, 0x04 } },
-   { { 0x02, 0x02 },  { 0x02, 0x02 } },
-   { { 0x01, 0x01 },  { 0x01, 0x01 } }
-};
-
 static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic)
 {
    switch (xvmc_pic) {
@@ -66,188 +60,57 @@ static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic)
    return -1;
 }
 
-static inline void
-MacroBlockTypeToPipeWeights(const XvMCMacroBlock *xvmc_mb, unsigned weights[2])
-{
-   assert(xvmc_mb);
-
-   switch (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) {
-   case XVMC_MB_TYPE_MOTION_FORWARD:
-      weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX;
-      weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
-      break;
-
-   case (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD):
-      weights[0] = PIPE_VIDEO_MV_WEIGHT_HALF;
-      weights[1] = PIPE_VIDEO_MV_WEIGHT_HALF;
-      break;
-
-   case XVMC_MB_TYPE_MOTION_BACKWARD:
-      weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN;
-      weights[1] = PIPE_VIDEO_MV_WEIGHT_MAX;
-      break;
-
-   default:
-      /* workaround for xines xxmc video out plugin */
-      if (!(xvmc_mb->macroblock_type & ~XVMC_MB_TYPE_PATTERN)) {
-         weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX;
-         weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
-      } else {
-         weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN;
-         weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
-      }
-      break;
-   }
-}
-
-static inline struct pipe_motionvector
-MotionVectorToPipe(const XvMCMacroBlock *xvmc_mb, unsigned vector,
-                   unsigned field_select_mask, unsigned weight)
-{
-   struct pipe_motionvector mv;
-
-   assert(xvmc_mb);
-
-   switch (xvmc_mb->motion_type) {
-   case XVMC_PREDICTION_FRAME:
-      mv.top.x = xvmc_mb->PMV[0][vector][0];
-      mv.top.y = xvmc_mb->PMV[0][vector][1];
-      mv.top.field_select = PIPE_VIDEO_FRAME;
-      mv.top.weight = weight;
-
-      mv.bottom.x = xvmc_mb->PMV[0][vector][0];
-      mv.bottom.y = xvmc_mb->PMV[0][vector][1];
-      mv.bottom.weight = weight;
-      mv.bottom.field_select = PIPE_VIDEO_FRAME;
-      break;
-
-   case XVMC_PREDICTION_FIELD:
-      mv.top.x = xvmc_mb->PMV[0][vector][0];
-      mv.top.y = xvmc_mb->PMV[0][vector][1];
-      mv.top.field_select = (xvmc_mb->motion_vertical_field_select & field_select_mask) ?
-         PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
-      mv.top.weight = weight;
-
-      mv.bottom.x = xvmc_mb->PMV[1][vector][0];
-      mv.bottom.y = xvmc_mb->PMV[1][vector][1];
-      mv.bottom.field_select = (xvmc_mb->motion_vertical_field_select & (field_select_mask << 2)) ?
-         PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
-      mv.bottom.weight = weight;
-      break;
-
-   default: // TODO: Support DUALPRIME and 16x8
-      break;
-   }
-
-   return mv;
-}
-
-static inline void
-UploadYcbcrBlocks(XvMCSurfacePrivate *surface,
-                  const XvMCMacroBlock *xvmc_mb,
-                  const XvMCBlockArray *xvmc_blocks)
-{
-   enum pipe_mpeg12_dct_intra intra;
-   enum pipe_mpeg12_dct_type coding;
-
-   unsigned tb, x, y, luma_blocks;
-   short *blocks;
-
-   assert(surface);
-   assert(xvmc_mb);
-
-   if (!xvmc_mb->coded_block_pattern)
-      return;
-
-   intra = xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA ?
-           PIPE_MPEG12_DCT_INTRA : PIPE_MPEG12_DCT_DELTA;
-
-   coding = xvmc_mb->dct_type == XVMC_DCT_TYPE_FIELD ?
-            PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
-
-   blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
-
-   for (y = 0, luma_blocks = 0; y < 2; ++y) {
-      for (x = 0; x < 2; ++x, ++tb) {
-         if (xvmc_mb->coded_block_pattern & const_empty_block_mask_420[0][y][x]) {
-
-            struct pipe_ycbcr_block *stream = surface->ycbcr[0].stream;
-            stream->x = xvmc_mb->x * 2 + x;
-            stream->y = xvmc_mb->y * 2 + y;
-            stream->intra = intra;
-            stream->coding = coding;
-
-            surface->ycbcr[0].num_blocks_added++;
-            surface->ycbcr[0].stream++;
-
-            luma_blocks++;
-         }
-      }
-   }
-
-   if (luma_blocks > 0) {
-      memcpy(surface->ycbcr[0].buffer, blocks, BLOCK_SIZE_BYTES * luma_blocks);
-      surface->ycbcr[0].buffer += BLOCK_SIZE_SAMPLES * luma_blocks;
-      blocks += BLOCK_SIZE_SAMPLES * luma_blocks;
-   }
-
-   /* TODO: Implement 422, 444 */
-   //assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-
-   for (tb = 1; tb < 3; ++tb) {
-      if (xvmc_mb->coded_block_pattern & const_empty_block_mask_420[tb][0][0]) {
-
-         struct pipe_ycbcr_block *stream = surface->ycbcr[tb].stream;
-         stream->x = xvmc_mb->x;
-         stream->y = xvmc_mb->y;
-         stream->intra = intra;
-         stream->coding = PIPE_MPEG12_DCT_TYPE_FRAME;
-
-         memcpy(surface->ycbcr[tb].buffer, blocks, BLOCK_SIZE_BYTES);
-
-         surface->ycbcr[tb].num_blocks_added++;
-         surface->ycbcr[tb].stream++;
-         surface->ycbcr[tb].buffer += BLOCK_SIZE_SAMPLES;
-         blocks += BLOCK_SIZE_SAMPLES;
-      }
-   }
-
-}
-
 static void
-MacroBlocksToPipe(XvMCSurfacePrivate *surface,
+MacroBlocksToPipe(XvMCContextPrivate *context,
+                  XvMCSurfacePrivate *surface,
                   unsigned int xvmc_picture_structure,
                   const XvMCMacroBlock *xvmc_mb,
                   const XvMCBlockArray *xvmc_blocks,
+                  struct pipe_mpeg12_macroblock *mb,
                   unsigned int num_macroblocks)
 {
-   unsigned int i, j;
+   unsigned int i, j, k;
 
    assert(xvmc_mb);
    assert(xvmc_blocks);
    assert(num_macroblocks);
 
-   for (i = 0; i < num_macroblocks; ++i) {
-      unsigned mv_pos = xvmc_mb->x + surface->mv_stride * xvmc_mb->y;
-      unsigned mv_weights[2];
+   for (; num_macroblocks > 0; --num_macroblocks) {
+      mb->base.codec = PIPE_VIDEO_CODEC_MPEG12;
+      mb->x = xvmc_mb->x;
+      mb->y = xvmc_mb->y;
+      mb->macroblock_type = xvmc_mb->macroblock_type;
 
-      if (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_PATTERN | XVMC_MB_TYPE_INTRA))
-         UploadYcbcrBlocks(surface, xvmc_mb, xvmc_blocks);
+      switch (xvmc_picture_structure) {
+      case XVMC_FRAME_PICTURE:
+         mb->macroblock_modes.bits.frame_motion_type = xvmc_mb->motion_type;
+         mb->macroblock_modes.bits.field_motion_type = 0;
+         break;
 
-      MacroBlockTypeToPipeWeights(xvmc_mb, mv_weights);
+      case XVMC_TOP_FIELD:
+      case XVMC_BOTTOM_FIELD:
+         mb->macroblock_modes.bits.frame_motion_type = 0;
+         mb->macroblock_modes.bits.field_motion_type = xvmc_mb->motion_type;
+         break;
 
-      for (j = 0; j < 2; ++j) {
-         if (!surface->ref[j].mv) continue;
-
-         surface->ref[j].mv[mv_pos] = MotionVectorToPipe
-         (
-            xvmc_mb, j,
-            j ? XVMC_SELECT_FIRST_BACKWARD : XVMC_SELECT_FIRST_FORWARD,
-            mv_weights[j]
-         );
+      default:
+         assert(0);
       }
 
+      mb->macroblock_modes.bits.dct_type = xvmc_mb->dct_type;
+      mb->motion_vertical_field_select = xvmc_mb->motion_vertical_field_select;
+
+      for (i = 0; i < 2; ++i)
+         for (j = 0; j < 2; ++j)
+            for (k = 0; k < 2; ++k)
+               mb->PMV[i][j][k] = xvmc_mb->PMV[i][j][k];
+
+      mb->coded_block_pattern = xvmc_mb->coded_block_pattern;
+      mb->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
+      mb->num_skipped_macroblocks = 0;
+
       ++xvmc_mb;
+      ++mb;
    }
 }
 
@@ -270,8 +133,8 @@ SetDecoderStatus(XvMCSurfacePrivate *surface)
    decoder->set_decode_target(decoder, surface->video_buffer);
 
    for (i = 0; i < 2; ++i) {
-      if (surface->ref[i].surface) {
-         XvMCSurfacePrivate *ref = surface->ref[i].surface->privData;
+      if (surface->ref[i]) {
+         XvMCSurfacePrivate *ref = surface->ref[i]->privData;
 
          if (ref)
             ref_frames[num_refs++] = ref->video_buffer;
@@ -284,21 +147,21 @@ static void
 RecursiveEndFrame(XvMCSurfacePrivate *surface)
 {
    XvMCContextPrivate *context_priv;
-   unsigned i, num_ycbcr_blocks[3];
+   unsigned i;
 
    assert(surface);
 
    context_priv = surface->context->privData;
 
    for ( i = 0; i < 2; ++i ) {
-      if (surface->ref[i].surface) {
-         XvMCSurface *ref = surface->ref[i].surface;
+      if (surface->ref[i]) {
+         XvMCSurface *ref = surface->ref[i];
 
          assert(ref);
 
-         surface->ref[i].surface = NULL;
+         surface->ref[i] = NULL;
          RecursiveEndFrame(ref->privData);
-         surface->ref[i].surface = ref;
+         surface->ref[i] = ref;
       }
    }
 
@@ -306,13 +169,10 @@ RecursiveEndFrame(XvMCSurfacePrivate *surface)
       surface->frame_started = 0;
       SetDecoderStatus(surface);
 
-      for (i = 0; i < 3; ++i)
-         num_ycbcr_blocks[i] = surface->ycbcr[i].num_blocks_added;
-
       for (i = 0; i < 2; ++i)
-         surface->ref[i].surface = NULL;
+         surface->ref[i] = NULL;
 
-      context_priv->decoder->end_frame(context_priv->decoder, num_ycbcr_blocks);
+      context_priv->decoder->end_frame(context_priv->decoder);
    }
 }
 
@@ -381,6 +241,7 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
                          XvMCMacroBlockArray *macroblocks, XvMCBlockArray *blocks
 )
 {
+   struct pipe_mpeg12_macroblock mb[num_macroblocks];
    struct pipe_video_decoder *decoder;
 
    XvMCContextPrivate *context_priv;
@@ -389,8 +250,6 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    XvMCSurfacePrivate *future_surface_priv;
    XvMCMacroBlock *xvmc_mb;
 
-   unsigned i;
-
    XVMC_MSG(XVMC_TRACE, "[XvMC] Rendering to surface %p, with past %p and future %p\n",
             target_surface, past_surface, future_surface);
 
@@ -443,40 +302,28 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
 
    /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
    if (target_surface_priv->frame_started && (
-       target_surface_priv->ref[0].surface != past_surface ||
-       target_surface_priv->ref[1].surface != future_surface ||
+       target_surface_priv->ref[0] != past_surface ||
+       target_surface_priv->ref[1] != future_surface ||
        (xvmc_mb->x == 0 && xvmc_mb->y == 0))) {
 
       // If they change anyway we must assume that the current frame is ended
       RecursiveEndFrame(target_surface_priv);
    }
 
-   target_surface_priv->ref[0].surface = past_surface;
-   target_surface_priv->ref[1].surface = future_surface;
+   target_surface_priv->ref[0] = past_surface;
+   target_surface_priv->ref[1] = future_surface;
 
    SetDecoderStatus(target_surface_priv);
 
    if (!target_surface_priv->frame_started) {
-      decoder->begin_frame(decoder);
-
-      target_surface_priv->mv_stride = decoder->get_mv_stream_stride(decoder);
-      for (i = 0; i < 3; ++i) {
-         target_surface_priv->ycbcr[i].num_blocks_added = 0;
-         target_surface_priv->ycbcr[i].stream = decoder->get_ycbcr_stream(decoder, i);
-         target_surface_priv->ycbcr[i].buffer = decoder->get_ycbcr_buffer(decoder, i);
-      }
-
-      for (i = 0; i < 2; ++i) {
-         if (target_surface_priv->ref[i].surface)
-            target_surface_priv->ref[i].mv = decoder->get_mv_stream(decoder, i);
-         else
-            target_surface_priv->ref[i].mv = NULL;
-      }
-
       target_surface_priv->frame_started = 1;
+      decoder->begin_frame(decoder);
    }
 
-   MacroBlocksToPipe(target_surface_priv, picture_structure, xvmc_mb, blocks, num_macroblocks);
+   MacroBlocksToPipe(context_priv, target_surface_priv, picture_structure,
+                     xvmc_mb, blocks, mb, num_macroblocks);
+
+   context_priv->decoder->decode_macroblock(context_priv->decoder, &mb[0].base, num_macroblocks);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for rendering.\n", target_surface);
 
@@ -665,8 +512,6 @@ Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
    XvMCSurfacePrivate *surface_priv;
    XvMCContextPrivate *context_priv;
 
-   unsigned num_ycbcr_buffers[3] = { 0, 0, 0 };
-
    XVMC_MSG(XVMC_TRACE, "[XvMC] Destroying surface %p.\n", surface);
 
    assert(dpy);
@@ -679,7 +524,7 @@ Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
    
    if (surface_priv->frame_started) {
       SetDecoderStatus(surface_priv);
-      context_priv->decoder->end_frame(context_priv->decoder, num_ycbcr_buffers);
+      context_priv->decoder->end_frame(context_priv->decoder);
    }
    context_priv->decoder->destroy_buffer(context_priv->decoder, surface_priv->decode_buffer);
    surface_priv->video_buffer->destroy(surface_priv->video_buffer);
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 5b3debdb78b..fd14ac916ee 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -75,17 +75,7 @@ typedef struct
    // have we allready told the decoder to start a frame
    bool frame_started;
 
-   struct {
-      unsigned num_blocks_added;
-      struct pipe_ycbcr_block *stream;
-      short *buffer;
-   } ycbcr[3];
-
-   unsigned mv_stride;
-   struct {
-      XvMCSurface *surface;
-      struct pipe_motionvector *mv;
-   } ref[2];
+   XvMCSurface *ref[2];
 
    struct pipe_fence_handle *fence;
 

From 835ea8480f656ba4076e30813eb8c85965017266 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <deathsimple@vodafone.de>
Date: Thu, 11 Aug 2011 16:11:36 +0200
Subject: [PATCH 584/600] g3dvl: Rework the decoder interface part 4/5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make the picture_structure enum spec complient.
Also remove it from the compositor.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Younes Manton <younes.m@gmail.com>
---
 src/gallium/auxiliary/vl/vl_compositor.c      |  1 -
 src/gallium/auxiliary/vl/vl_compositor.h      |  1 -
 src/gallium/include/pipe/p_video_state.h      | 12 +++++++----
 src/gallium/state_trackers/vdpau/mixer.c      |  3 +--
 .../state_trackers/vdpau/presentation.c       |  3 +--
 .../state_trackers/xorg/xvmc/surface.c        | 20 +------------------
 6 files changed, 11 insertions(+), 29 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index faca96dc55b..c73f9769446 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -673,7 +673,6 @@ vl_compositor_set_rgba_layer(struct vl_compositor *c,
 
 void
 vl_compositor_render(struct vl_compositor *c,
-                     enum pipe_mpeg12_picture_type picture_type,
                      struct pipe_surface           *dst_surface,
                      struct pipe_video_rect        *dst_area,
                      struct pipe_video_rect        *dst_clip)
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 0a9a7411a61..207510092a0 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -156,7 +156,6 @@ vl_compositor_set_rgba_layer(struct vl_compositor *compositor,
  */
 void
 vl_compositor_render(struct vl_compositor          *compositor,
-                     enum pipe_mpeg12_picture_type picture_type,
                      struct pipe_surface           *dst_surface,
                      struct pipe_video_rect        *dst_area,
                      struct pipe_video_rect        *dst_clip);
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 9463af2fbe5..8e68f27cbdb 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -43,11 +43,15 @@ struct pipe_video_rect
    unsigned x, y, w, h;
 };
 
-enum pipe_mpeg12_picture_type
+/*
+ * see table 6-14 in the spec
+ */
+enum pipe_mpeg12_picture_structure
 {
-   PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP,
-   PIPE_MPEG12_PICTURE_TYPE_FIELD_BOTTOM,
-   PIPE_MPEG12_PICTURE_TYPE_FRAME
+   PIPE_MPEG12_PICTURE_STRUCTURE_RESERVED = 0x00,
+   PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP = 0x01,
+   PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_BOTTOM = 0x02,
+   PIPE_MPEG12_PICTURE_STRUCTURE_FRAME = 0x03
 };
 
 /*
diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index d5187006bfc..fbd24a29414 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -157,8 +157,7 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
 
    vl_compositor_clear_layers(&vmixer->compositor);
    vl_compositor_set_buffer_layer(&vmixer->compositor, 0, surf->video_buffer, NULL, NULL);
-   vl_compositor_render(&vmixer->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME,
-                        dst->surface, NULL, NULL);
+   vl_compositor_render(&vmixer->compositor, dst->surface, NULL, NULL);
 
    return VDP_STATUS_OK;
 }
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index 1176c7a30b7..7e324db5589 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -169,8 +169,7 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
 
    vl_compositor_clear_layers(&pq->compositor);
    vl_compositor_set_rgba_layer(&pq->compositor, 0, surf->sampler_view, NULL, NULL);
-   vl_compositor_render(&pq->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME,
-                        drawable_surface, NULL, NULL);
+   vl_compositor_render(&pq->compositor, drawable_surface, NULL, NULL);
 
    pq->device->context->pipe->screen->flush_frontbuffer
    (
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index a4a27cc5b96..fd7d228c996 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -42,24 +42,6 @@
 
 #include "xvmc_private.h"
 
-static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic)
-{
-   switch (xvmc_pic) {
-      case XVMC_TOP_FIELD:
-         return PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP;
-      case XVMC_BOTTOM_FIELD:
-         return PIPE_MPEG12_PICTURE_TYPE_FIELD_BOTTOM;
-      case XVMC_FRAME_PICTURE:
-         return PIPE_MPEG12_PICTURE_TYPE_FRAME;
-      default:
-         assert(0);
-   }
-
-   XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized picture type 0x%08X.\n", xvmc_pic);
-
-   return -1;
-}
-
 static void
 MacroBlocksToPipe(XvMCContextPrivate *context,
                   XvMCSurfacePrivate *surface,
@@ -447,7 +429,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    // Workaround for r600g, there seems to be a bug in the fence refcounting code
    pipe->screen->fence_reference(pipe->screen, &surface_priv->fence, NULL);
 
-   vl_compositor_render(compositor, PictureToPipe(flags), context_priv->drawable_surface, &dst_rect, NULL);
+   vl_compositor_render(compositor, context_priv->drawable_surface, &dst_rect, NULL);
                         
    pipe->flush(pipe, &surface_priv->fence);
 

From 2e62b30826679e9d5e1a783dc19baabec4fc8dfa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <deathsimple@vodafone.de>
Date: Fri, 12 Aug 2011 13:29:00 +0200
Subject: [PATCH 585/600] g3dvl: Rework the decoder interface part 5/5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make setting the quant matrixes a generic interface.
Also removes setting the quant matrix from the XvMC interface

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Younes Manton <younes.m@gmail.com>
---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   | 15 +++++++++++----
 src/gallium/include/pipe/p_video_decoder.h     |  3 +--
 src/gallium/include/pipe/p_video_state.h       | 14 ++++++++++++++
 src/gallium/state_trackers/vdpau/decode.c      | 12 ++++++++----
 src/gallium/state_trackers/xorg/xvmc/surface.c | 12 ------------
 5 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 805a2215133..8100f80665a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -513,15 +513,16 @@ vl_mpeg12_set_picture_parameters(struct pipe_video_decoder *decoder,
 
 static void
 vl_mpeg12_set_quant_matrix(struct pipe_video_decoder *decoder,
-                           const uint8_t intra_matrix[64],
-                           const uint8_t non_intra_matrix[64])
+                           const struct pipe_quant_matrix *matrix)
 {
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
+   const struct pipe_mpeg12_quant_matrix *m = (const struct pipe_mpeg12_quant_matrix *)matrix;
 
    assert(dec);
+   assert(matrix->codec == PIPE_VIDEO_CODEC_MPEG12);
 
-   memcpy(dec->intra_matrix, intra_matrix, 64);
-   memcpy(dec->non_intra_matrix, non_intra_matrix, 64);
+   memcpy(dec->intra_matrix, m->intra_matrix, 64);
+   memcpy(dec->non_intra_matrix, m->non_intra_matrix, 64);
 }
 
 static void
@@ -576,6 +577,9 @@ vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder)
    buf = dec->current_buffer;
    assert(buf);
 
+   if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
+      dec->intra_matrix[0] = 1 << (7 - dec->picture_desc.intra_dc_precision);
+
    for (i = 0; i < VL_MAX_PLANES; ++i) {
       vl_zscan_upload_quant(&buf->zscan[i], dec->intra_matrix, true);
       vl_zscan_upload_quant(&buf->zscan[i], dec->non_intra_matrix, false);
@@ -1152,6 +1156,9 @@ vl_create_mpeg12_decoder(struct pipe_context *context,
    if (!init_pipe_state(dec))
       goto error_pipe_state;
 
+   memset(dec->intra_matrix, 0x10, 64);
+   memset(dec->non_intra_matrix, 0x10, 64);
+
    return &dec->base;
 
 error_pipe_state:
diff --git a/src/gallium/include/pipe/p_video_decoder.h b/src/gallium/include/pipe/p_video_decoder.h
index c7f5877858d..2aa4001c179 100644
--- a/src/gallium/include/pipe/p_video_decoder.h
+++ b/src/gallium/include/pipe/p_video_decoder.h
@@ -84,8 +84,7 @@ struct pipe_video_decoder
     * set the quantification matrixes
     */
    void (*set_quant_matrix)(struct pipe_video_decoder *decoder,
-                            const uint8_t intra_matrix[64],
-                            const uint8_t non_intra_matrix[64]);
+                            const struct pipe_quant_matrix *matrix);
 
    /**
     * set target where video data is decoded to
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 8e68f27cbdb..8166ac76b63 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -100,6 +100,11 @@ struct pipe_picture_desc
    enum pipe_video_profile profile;
 };
 
+struct pipe_quant_matrix
+{
+   enum pipe_video_codec codec;
+};
+
 struct pipe_macroblock
 {
    enum pipe_video_codec codec;
@@ -116,9 +121,18 @@ struct pipe_mpeg12_picture_desc
    unsigned alternate_scan;
    unsigned intra_vlc_format;
    unsigned concealment_motion_vectors;
+   unsigned intra_dc_precision;
    unsigned f_code[2][2];
 };
 
+struct pipe_mpeg12_quant_matrix
+{
+   struct pipe_quant_matrix base;
+
+   const uint8_t *intra_matrix;
+   const uint8_t *non_intra_matrix;
+};
+
 struct pipe_mpeg12_macroblock
 {
    struct pipe_macroblock base;
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 5ca40f71efe..50d63ea3f73 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -182,8 +182,8 @@ vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder,
                          VdpBitstreamBuffer const *bitstream_buffers)
 {
    struct pipe_mpeg12_picture_desc picture;
+   struct pipe_mpeg12_quant_matrix quant;
    struct pipe_video_buffer *ref_frames[2];
-   uint8_t intra_quantizer_matrix[64];
    unsigned i;
 
    VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Decoding MPEG2\n");
@@ -216,6 +216,7 @@ vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder,
    picture.alternate_scan = picture_info->alternate_scan;
    picture.intra_vlc_format = picture_info->intra_vlc_format;
    picture.concealment_motion_vectors = picture_info->concealment_motion_vectors;
+   picture.intra_dc_precision = picture_info->intra_dc_precision;
    picture.f_code[0][0] = picture_info->f_code[0][0] - 1;
    picture.f_code[0][1] = picture_info->f_code[0][1] - 1;
    picture.f_code[1][0] = picture_info->f_code[1][0] - 1;
@@ -223,9 +224,12 @@ vlVdpDecoderRenderMpeg12(struct pipe_video_decoder *decoder,
 
    decoder->set_picture_parameters(decoder, &picture.base);
 
-   memcpy(intra_quantizer_matrix, picture_info->intra_quantizer_matrix, sizeof(intra_quantizer_matrix));
-   intra_quantizer_matrix[0] = 1 << (7 - picture_info->intra_dc_precision);
-   decoder->set_quant_matrix(decoder, intra_quantizer_matrix, picture_info->non_intra_quantizer_matrix);
+   memset(&quant, 0, sizeof(quant));
+   quant.base.codec = PIPE_VIDEO_CODEC_MPEG12;
+   quant.intra_matrix = picture_info->intra_quantizer_matrix;
+   quant.non_intra_matrix = picture_info->non_intra_quantizer_matrix;
+
+   decoder->set_quant_matrix(decoder, &quant.base);
 
    decoder->begin_frame(decoder);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index fd7d228c996..79bd9c618ce 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -161,17 +161,6 @@ RecursiveEndFrame(XvMCSurfacePrivate *surface)
 PUBLIC
 Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surface)
 {
-   static const uint8_t dummy_quant[64] = {
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
-   };
-
    XvMCContextPrivate *context_priv;
    struct pipe_context *pipe;
    XvMCSurfacePrivate *surface_priv;
@@ -193,7 +182,6 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
       return BadAlloc;
 
    surface_priv->decode_buffer = context_priv->decoder->create_buffer(context_priv->decoder);
-   context_priv->decoder->set_quant_matrix(context_priv->decoder, dummy_quant, dummy_quant);
    surface_priv->video_buffer = pipe->create_video_buffer
    (
       pipe, PIPE_FORMAT_NV12, context_priv->decoder->chroma_format,

From 31096e13f858daf896c0c53077fb25e92da089a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <deathsimple@vodafone.de>
Date: Wed, 24 Aug 2011 22:10:42 +0200
Subject: [PATCH 586/600] g3dvl: Use a single texture for luma and chroma data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Younes Manton <younes.m@gmail.com>
---
 .../auxiliary/vl/vl_mpeg12_bitstream.c        |  20 ++-
 .../auxiliary/vl/vl_mpeg12_bitstream.h        |   5 +-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c  | 144 +++++++++---------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h  |   8 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.c  |  49 +-----
 src/gallium/auxiliary/vl/vl_vertex_buffers.h  |   3 +-
 6 files changed, 90 insertions(+), 139 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 61ffcd1b7b2..bc889292ad7 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -1199,6 +1199,7 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
    bs->ycbcr_stream[cc]->y = y;
    bs->ycbcr_stream[cc]->intra = 1;
    bs->ycbcr_stream[cc]->coding = coding;
+   bs->ycbcr_stream[cc]->block_num = bs->block_num++;
 
    vl_vlc_needbits(&bs->vlc);
 
@@ -1218,11 +1219,11 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
    else
       get_intra_block_B14(bs, quantizer_scale, dest);
 
-   memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
+   memcpy(bs->ycbcr_buffer, dest, sizeof(int16_t) * 64);
 
    bs->num_ycbcr_blocks[cc]++;
    bs->ycbcr_stream[cc]++;
-   bs->ycbcr_buffer[cc] += 64;
+   bs->ycbcr_buffer += 64;
 }
 
 static INLINE void
@@ -1235,6 +1236,7 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
    bs->ycbcr_stream[cc]->y = y;
    bs->ycbcr_stream[cc]->intra = 0;
    bs->ycbcr_stream[cc]->coding = coding;
+   bs->ycbcr_stream[cc]->block_num = bs->block_num++;
 
    memset(dest, 0, sizeof(int16_t) * 64);
    if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1)
@@ -1242,11 +1244,11 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
    else
       get_non_intra_block(bs, quantizer_scale, dest);
 
-   memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
+   memcpy(bs->ycbcr_buffer, dest, sizeof(int16_t) * 64);
 
    bs->num_ycbcr_blocks[cc]++;
    bs->ycbcr_stream[cc]++;
-   bs->ycbcr_buffer[cc] += 64;
+   bs->ycbcr_buffer += 64;
 }
 
 static INLINE void
@@ -1788,7 +1790,7 @@ vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height)
 
 void
 vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
-                        short *ycbcr_buffer[VL_MAX_PLANES], struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES])
+                        short *ycbcr_buffer, struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES])
 {
    unsigned i;
 
@@ -1796,10 +1798,12 @@ vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct vl_ycbcr_block *ycbcr_str
    assert(ycbcr_stream && ycbcr_buffer);
    assert(mv_stream);
 
-   for (i = 0; i < VL_MAX_PLANES; ++i) {
+   bs->block_num = 0;
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
       bs->ycbcr_stream[i] = ycbcr_stream[i];
-      bs->ycbcr_buffer[i] = ycbcr_buffer[i];
-   }
+   bs->ycbcr_buffer = ycbcr_buffer;
+
    for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
       bs->mv_stream[i] = mv_stream[i];
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
index 8a35dc49d2d..797a7e792a8 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
@@ -38,10 +38,11 @@ struct vl_mpg12_bs
 
    struct vl_vlc vlc;
 
+   unsigned block_num;
    unsigned *num_ycbcr_blocks;
 
    struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
-   short *ycbcr_buffer[VL_MAX_PLANES];
+   short *ycbcr_buffer;
 
    struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES];
 };
@@ -51,7 +52,7 @@ vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height);
 
 void
 vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
-                        short *ycbcr_buffer[VL_MAX_PLANES], struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES]);
+                        short *ycbcr_buffer, struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES]);
 
 void
 vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 8100f80665a..a27066765eb 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -30,6 +30,7 @@
 
 #include <util/u_memory.h>
 #include <util/u_rect.h>
+#include <util/u_sampler.h>
 #include <util/u_video.h>
 
 #include "vl_mpeg12_decoder.h"
@@ -84,29 +85,35 @@ static const unsigned const_empty_block_mask_420[3][2][2] = {
 static bool
 init_zscan_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer)
 {
-   enum pipe_format formats[3];
-
-   struct pipe_sampler_view **source;
+   struct pipe_resource *res, res_tmpl;
+   struct pipe_sampler_view sv_tmpl;
    struct pipe_surface **destination;
 
    unsigned i;
 
    assert(dec && buffer);
 
-   formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
-   buffer->zscan_source = vl_video_buffer_create_ex
-   (
-      dec->base.context,
-      dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
-      align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line,
-      1, PIPE_VIDEO_CHROMA_FORMAT_444, formats, PIPE_USAGE_STATIC
-   );
+   memset(&res_tmpl, 0, sizeof(res_tmpl));
+   res_tmpl.target = PIPE_TEXTURE_2D;
+   res_tmpl.format = dec->zscan_source_format;
+   res_tmpl.width0 = dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT;
+   res_tmpl.height0 = align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line;
+   res_tmpl.depth0 = 1;
+   res_tmpl.array_size = 1;
+   res_tmpl.usage = PIPE_USAGE_STREAM;
+   res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
 
-   if (!buffer->zscan_source)
+   res = dec->base.context->screen->resource_create(dec->base.context->screen, &res_tmpl);
+   if (!res)
       goto error_source;
 
-   source = buffer->zscan_source->get_sampler_view_planes(buffer->zscan_source);
-   if (!source)
+
+   memset(&sv_tmpl, 0, sizeof(sv_tmpl));
+   u_sampler_view_default_template(&sv_tmpl, res, res->format);
+   sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = PIPE_SWIZZLE_RED;
+   buffer->zscan_source = dec->base.context->create_sampler_view(dec->base.context, res, &sv_tmpl);
+   pipe_resource_reference(&res, NULL);
+   if (!buffer->zscan_source)
       goto error_sampler;
 
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
@@ -119,7 +126,7 @@ init_zscan_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer
 
    for (i = 0; i < VL_MAX_PLANES; ++i)
       if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c,
-                                &buffer->zscan[i], source[i], destination[i]))
+                                &buffer->zscan[i], buffer->zscan_source, destination[i]))
          goto error_plane;
 
    return true;
@@ -130,7 +137,7 @@ error_plane:
 
 error_surface:
 error_sampler:
-   buffer->zscan_source->destroy(buffer->zscan_source);
+   pipe_sampler_view_reference(&buffer->zscan_source, NULL);
 
 error_source:
    return false;
@@ -145,7 +152,8 @@ cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer)
 
    for (i = 0; i < VL_MAX_PLANES; ++i)
       vl_zscan_cleanup_buffer(&buffer->zscan[i]);
-   buffer->zscan_source->destroy(buffer->zscan_source);
+
+   pipe_sampler_view_reference(&buffer->zscan_source, NULL);
 }
 
 static bool
@@ -321,8 +329,7 @@ UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec,
                   const struct pipe_mpeg12_macroblock *mb)
 {
    unsigned intra;
-   unsigned tb, x, y, luma_blocks;
-   short *blocks;
+   unsigned tb, x, y, num_blocks = 0;
 
    assert(dec && buf);
    assert(mb);
@@ -330,10 +337,9 @@ UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec,
    if (!mb->coded_block_pattern)
       return;
 
-   blocks = mb->blocks;
    intra = mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA ? 1 : 0;
 
-   for (y = 0, luma_blocks = 0; y < 2; ++y) {
+   for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
          if (mb->coded_block_pattern & const_empty_block_mask_420[0][y][x]) {
 
@@ -342,21 +348,16 @@ UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec,
             stream->y = mb->y * 2 + y;
             stream->intra = intra;
             stream->coding = mb->macroblock_modes.bits.dct_type;
+            stream->block_num = buf->block_num++;
 
             buf->num_ycbcr_blocks[0]++;
             buf->ycbcr_stream[0]++;
 
-            luma_blocks++;
+            num_blocks++;
          }
       }
    }
 
-   if (luma_blocks > 0) {
-      memcpy(buf->texels[0], blocks, 64 * sizeof(short) * luma_blocks);
-      buf->texels[0] += 64 * luma_blocks;
-      blocks += 64 * luma_blocks;
-   }
-
    /* TODO: Implement 422, 444 */
    //assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 
@@ -368,15 +369,17 @@ UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec,
          stream->y = mb->y;
          stream->intra = intra;
          stream->coding = 0;
+         stream->block_num = buf->block_num++;
 
          buf->num_ycbcr_blocks[tb]++;
          buf->ycbcr_stream[tb]++;
 
-         memcpy(buf->texels[tb], blocks, 64 * sizeof(short));
-         buf->texels[tb] += 64;
-         blocks += 64;
+         num_blocks++;
       }
    }
+
+   memcpy(buf->texels, mb->blocks, 64 * sizeof(short) * num_blocks);
+   buf->texels += 64 * num_blocks;
 }
 
 static void
@@ -411,7 +414,6 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
 
    pipe_resource_reference(&dec->quads.buffer, NULL);
    pipe_resource_reference(&dec->pos.buffer, NULL);
-   pipe_resource_reference(&dec->block_num.buffer, NULL);
 
    pipe_sampler_view_reference(&dec->zscan_linear, NULL);
    pipe_sampler_view_reference(&dec->zscan_normal, NULL);
@@ -567,9 +569,11 @@ static void
 vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder)
 {
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder;
-
    struct vl_mpeg12_buffer *buf;
-   struct pipe_sampler_view **sampler_views;
+
+   struct pipe_resource *tex;
+   struct pipe_box rect = { 0, 0, 0, 1, 1, 1 };
+
    unsigned i;
 
    assert(dec);
@@ -587,35 +591,25 @@ vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder)
 
    vl_vb_map(&buf->vertex_stream, dec->base.context);
 
-   sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source);
+   tex = buf->zscan_source->texture;
+   rect.width = tex->width0;
+   rect.height = tex->height0;
 
-   assert(sampler_views);
+   buf->tex_transfer = dec->base.context->get_transfer
+   (
+      dec->base.context, tex,
+      0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &rect
+   );
+
+   buf->block_num = 0;
+   buf->texels = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer);
 
    for (i = 0; i < VL_MAX_PLANES; ++i) {
-      struct pipe_resource *tex = sampler_views[i]->texture;
-      struct pipe_box rect =
-      {
-         0, 0, 0,
-         tex->width0,
-         tex->height0,
-         1
-      };
-
-      buf->tex_transfer[i] = dec->base.context->get_transfer
-      (
-         dec->base.context, tex,
-         0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-         &rect
-      );
-
-      buf->texels[i] = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer[i]);
-
+      buf->ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i);
       buf->num_ycbcr_blocks[i] = 0;
    }
 
-   for (i = 0; i < VL_MAX_PLANES; ++i)
-      buf->ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i);
-
    for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
       buf->mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
 
@@ -734,10 +728,8 @@ vl_mpeg12_end_frame(struct pipe_video_decoder *decoder)
 
    vl_vb_unmap(&buf->vertex_stream, dec->base.context);
 
-   for (i = 0; i < VL_MAX_PLANES; ++i) {
-      dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer[i]);
-      dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer[i]);
-   }
+   dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer);
+   dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer);
 
    vb[0] = dec->quads;
    vb[1] = dec->pos;
@@ -758,14 +750,12 @@ vl_mpeg12_end_frame(struct pipe_video_decoder *decoder)
       }
    }
 
-   vb[2] = dec->block_num;
-
    dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_ycbcr);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
       if (!buf->num_ycbcr_blocks[i]) continue;
 
       vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
-      dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
+      dec->base.context->set_vertex_buffers(dec->base.context, 2, vb);
 
       vl_zscan_render(&buf->zscan[i] , buf->num_ycbcr_blocks[i]);
 
@@ -782,7 +772,7 @@ vl_mpeg12_end_frame(struct pipe_video_decoder *decoder)
          if (!buf->num_ycbcr_blocks[i]) continue;
 
          vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component);
-         dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
+         dec->base.context->set_vertex_buffers(dec->base.context, 2, vb);
 
          if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
             vl_idct_prepare_stage2(&buf->idct[component]);
@@ -1085,31 +1075,33 @@ vl_create_mpeg12_decoder(struct pipe_context *context,
    dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
    dec->width_in_macroblocks = align(dec->base.width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
 
-   dec->quads = vl_vb_upload_quads(dec->base.context);
-   dec->pos = vl_vb_upload_pos(
-      dec->base.context,
-      dec->base.width / MACROBLOCK_WIDTH,
-      dec->base.height / MACROBLOCK_HEIGHT
-   );
-   dec->block_num = vl_vb_upload_block_num(dec->base.context, dec->num_blocks);
-
-   dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context);
-   dec->ves_mv = vl_vb_get_ves_mv(dec->base.context);
-
    /* TODO: Implement 422, 444 */
    assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 
    if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
       dec->chroma_width = dec->base.width / 2;
       dec->chroma_height = dec->base.height / 2;
+      dec->num_blocks = dec->num_blocks * 2;
    } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
       dec->chroma_width = dec->base.width;
       dec->chroma_height = dec->base.height / 2;
+      dec->num_blocks = dec->num_blocks * 2 + dec->num_blocks;
    } else {
       dec->chroma_width = dec->base.width;
       dec->chroma_height = dec->base.height;
+      dec->num_blocks = dec->num_blocks * 3;
    }
 
+   dec->quads = vl_vb_upload_quads(dec->base.context);
+   dec->pos = vl_vb_upload_pos(
+      dec->base.context,
+      dec->base.width / MACROBLOCK_WIDTH,
+      dec->base.height / MACROBLOCK_HEIGHT
+   );
+
+   dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context);
+   dec->ves_mv = vl_vb_get_ves_mv(dec->base.context);
+
    switch (entrypoint) {
    case PIPE_VIDEO_ENTRYPOINT_BITSTREAM:
       format_config = find_format_config(dec, bitstream_format_config, num_bitstream_format_configs);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 277f5b90e4b..4a8d65335f6 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -55,7 +55,6 @@ struct vl_mpeg12_decoder
 
    struct pipe_vertex_buffer quads;
    struct pipe_vertex_buffer pos;
-   struct pipe_vertex_buffer block_num;
 
    void *ves_ycbcr;
    void *ves_mv;
@@ -87,17 +86,18 @@ struct vl_mpeg12_buffer
 {
    struct vl_vertex_buffer vertex_stream;
 
+   unsigned block_num;
    unsigned num_ycbcr_blocks[3];
 
-   struct pipe_video_buffer *zscan_source;
+   struct pipe_sampler_view *zscan_source;
 
    struct vl_mpg12_bs bs;
    struct vl_zscan_buffer zscan[VL_MAX_PLANES];
    struct vl_idct_buffer idct[VL_MAX_PLANES];
    struct vl_mc_buffer mc[VL_MAX_PLANES];
 
-   struct pipe_transfer *tex_transfer[VL_MAX_PLANES];
-   short *texels[VL_MAX_PLANES];
+   struct pipe_transfer *tex_transfer;
+   short *texels;
 
    struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
    struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES];
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index b7aa14bb4db..281db8018eb 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -125,49 +125,6 @@ vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height)
    return pos;
 }
 
-struct pipe_vertex_buffer
-vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks)
-{
-   struct pipe_vertex_buffer buf;
-   struct pipe_transfer *buf_transfer;
-   struct vertex2s *v;
-   unsigned i;
-
-   assert(pipe);
-
-   /* create buffer */
-   buf.stride = sizeof(struct vertex2s);
-   buf.buffer_offset = 0;
-   buf.buffer = pipe_buffer_create
-   (
-      pipe->screen,
-      PIPE_BIND_VERTEX_BUFFER,
-      PIPE_USAGE_STATIC,
-      sizeof(struct vertex2s) * num_blocks
-   );
-
-   if(!buf.buffer)
-      return buf;
-
-   /* and fill it */
-   v = pipe_buffer_map
-   (
-      pipe,
-      buf.buffer,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buf_transfer
-   );
-
-   for ( i = 0; i < num_blocks; ++i, ++v) {
-      v->x = i;
-      v->y = i;
-   }
-
-   pipe_buffer_unmap(pipe, buf_transfer);
-
-   return buf;
-}
-
 static struct pipe_vertex_element
 vl_vb_get_quad_vertex_element(void)
 {
@@ -211,12 +168,10 @@ vl_vb_get_ves_ycbcr(struct pipe_context *pipe)
    /* Position element */
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
 
-   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1);
-
    /* block num element */
-   vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R16G16_SSCALED;
+   vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R32_FLOAT;
 
-   vl_vb_element_helper(&vertex_elems[VS_I_BLOCK_NUM], 1, 2);
+   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 2, 1);
 
    return pipe->create_vertex_elements_state(pipe, 3, vertex_elems);
 }
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 38db899916d..874ecce9041 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -85,6 +85,7 @@ struct vl_ycbcr_block
    uint8_t x, y;
    uint8_t intra;
    uint8_t coding;
+   float block_num;
 };
 
 struct vl_vertex_buffer
@@ -108,8 +109,6 @@ struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe);
 
 struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height);
 
-struct pipe_vertex_buffer vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks);
-
 void *vl_vb_get_ves_ycbcr(struct pipe_context *pipe);
 
 void *vl_vb_get_ves_mv(struct pipe_context *pipe);

From 9765dede7556f7ccfef1d90bab14a2bfa03384e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <deathsimple@vodafone.de>
Date: Wed, 24 Aug 2011 22:51:31 +0200
Subject: [PATCH 587/600] g3dvl: Rewrite the mpeg 1&2 bitstream parser
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based on work of Maarten Lankhorst this time.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Younes Manton <younes.m@gmail.com>
---
 .../auxiliary/vl/vl_mpeg12_bitstream.c        | 2646 ++++++-----------
 .../auxiliary/vl/vl_mpeg12_bitstream.h        |   23 +-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c  |    8 +-
 src/gallium/auxiliary/vl/vl_vlc.h             |  197 +-
 src/gallium/include/pipe/p_video_state.h      |   11 +
 5 files changed, 1034 insertions(+), 1851 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index bc889292ad7..ef00e2d9466 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -1,6 +1,7 @@
 /**************************************************************************
  *
- * Copyright 2011 Christian König.
+ * Copyright 2011 Maarten Lankhorst
+ * Copyright 2011 Christian König
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,1817 +26,966 @@
  *
  **************************************************************************/
 
-/**
- * This file is based uppon slice_xvmc.c and vlc.h from the xine project,
- * which in turn is based on mpeg2dec. The following is the original copyright:
- *
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include <stdint.h>
-
-#include <pipe/p_compiler.h>
-#include <pipe/p_video_state.h>
+#include <pipe/p_video_decoder.h>
+#include <util/u_memory.h>
 
 #include "vl_vlc.h"
 #include "vl_mpeg12_bitstream.h"
 
-/* take num bits from the high part of bit_buf and zero extend them */
-#define UBITS(buf,num) (((uint32_t)(buf)) >> (32 - (num)))
+enum {
+   dct_End_of_Block = 0xFF,
+   dct_Escape = 0xFE,
+   dct_DC = 0xFD,
+   dct_AC = 0xFC
+};
 
-/* take num bits from the high part of bit_buf and sign extend them */
-#define SBITS(buf,num) (((int32_t)(buf)) >> (32 - (num)))
-
-/* macroblock modes */
-#define MACROBLOCK_INTRA 1
-#define MACROBLOCK_PATTERN 2
-#define MACROBLOCK_MOTION_BACKWARD 4
-#define MACROBLOCK_MOTION_FORWARD 8
-#define MACROBLOCK_QUANT 16
-
-/* motion_type */
-#define MOTION_TYPE_MASK (3*64)
-#define MOTION_TYPE_BASE 64
-#define MC_FIELD (1*64)
-#define MC_FRAME (2*64)
-#define MC_16X8 (2*64)
-#define MC_DMV (3*64)
-
-/* picture structure */
-#define TOP_FIELD     1
-#define BOTTOM_FIELD  2
-#define FRAME_PICTURE 3
-
-/* picture coding type (mpeg2 header) */
-#define I_TYPE 1
-#define P_TYPE 2
-#define B_TYPE 3
-#define D_TYPE 4
-
-typedef struct {
-   uint8_t modes;
-   uint8_t len;
-} MBtab;
-
-typedef struct {
-   uint8_t delta;
-   uint8_t len;
-} MVtab;
-
-typedef struct {
-   int8_t dmv;
-   uint8_t len;
-} DMVtab;
-
-typedef struct {
-   uint8_t cbp;
-   uint8_t len;
-} CBPtab;
-
-typedef struct {
-   uint8_t size;
-   uint8_t len;
-} DCtab;
-
-typedef struct {
+struct dct_coeff
+{
+   uint8_t length;
    uint8_t run;
-   uint8_t level;
-   uint8_t len;
-} DCTtab;
-
-typedef struct {
-   uint8_t mba;
-   uint8_t len;
-} MBAtab;
-
-#define INTRA MACROBLOCK_INTRA
-#define QUANT MACROBLOCK_QUANT
-#define MC MACROBLOCK_MOTION_FORWARD
-#define CODED MACROBLOCK_PATTERN
-#define FWD MACROBLOCK_MOTION_FORWARD
-#define BWD MACROBLOCK_MOTION_BACKWARD
-#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD
-
-static const MBtab MB_I [] = {
-   {INTRA|QUANT, 2}, {INTRA, 1}
+   int16_t level;
 };
 
-static const MBtab MB_P [] = {
-   {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA,    5},
-   {MC,          3}, {MC,          3}, {MC,             3}, {MC,       3},
-   {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
-   {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
-   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
-   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
-   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
-   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1}
-};
-
-static const MBtab MB_B [] = {
-   {0,                 0}, {INTRA|QUANT,       6},
-   {BWD|CODED|QUANT,   6}, {FWD|CODED|QUANT,   6},
-   {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5},
-                                     {INTRA,       5}, {INTRA,       5},
-   {FWD,         4}, {FWD,         4}, {FWD,         4}, {FWD,         4},
-   {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4},
-   {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
-   {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
-   {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
-   {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
-   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
-   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
-   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
-   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
-   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}
-};
-
-#undef INTRA
-#undef QUANT
-#undef MC
-#undef CODED
-#undef FWD
-#undef BWD
-#undef INTER
-
-static const MVtab MV_4 [] = {
-   { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}
-};
-
-static const MVtab MV_10 [] = {
-   { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10},
-   { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10},
-   {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9},
-   { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7},
-   { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7},
-   { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}
-};
-
-static const DMVtab DMV_2 [] = {
-   { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2}
-};
-
-static const CBPtab CBP_7 [] = {
-   {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7},
-   {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7},
-   {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6},
-   {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6},
-   {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5},
-   {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5},
-   {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5},
-   {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5},
-   {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5},
-   {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5},
-   {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5},
-   {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5},
-   {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5},
-   {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5},
-   {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5},
-   {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5},
-   {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
-   {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
-   {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
-   {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
-   {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
-   {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
-   {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
-   {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
-   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
-   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
-   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
-   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}
-};
-
-static const CBPtab CBP_9 [] = {
-   {0,    0}, {0x00, 9}, {0x27, 9}, {0x1b, 9},
-   {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9},
-   {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8},
-   {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8},
-   {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8},
-   {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8},
-   {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8},
-   {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8},
-   {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8},
-   {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8},
-   {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8},
-   {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8},
-   {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8},
-   {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8},
-   {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8},
-   {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8}
-};
-
-static const DCtab DC_lum_5 [] = {
-   {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
-   {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
-   {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
-   {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}
-};
-
-static const DCtab DC_chrom_5 [] = {
-   {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
-   {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
-   {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
-   {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}
-};
-
-static const DCtab DC_long [] = {
-   {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
-   {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
-   {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6},
-   {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}
-};
-
-static const DCTtab DCT_16 [] = {
-   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
-   {  2,18, 0}, {  2,17, 0}, {  2,16, 0}, {  2,15, 0},
-   {  7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0},
-   { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0},
-   { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0}
-};
-
-static const DCTtab DCT_15 [] = {
-   {  1,40,15}, {  1,39,15}, {  1,38,15}, {  1,37,15},
-   {  1,36,15}, {  1,35,15}, {  1,34,15}, {  1,33,15},
-   {  1,32,15}, {  2,14,15}, {  2,13,15}, {  2,12,15},
-   {  2,11,15}, {  2,10,15}, {  2, 9,15}, {  2, 8,15},
-   {  1,31,14}, {  1,31,14}, {  1,30,14}, {  1,30,14},
-   {  1,29,14}, {  1,29,14}, {  1,28,14}, {  1,28,14},
-   {  1,27,14}, {  1,27,14}, {  1,26,14}, {  1,26,14},
-   {  1,25,14}, {  1,25,14}, {  1,24,14}, {  1,24,14},
-   {  1,23,14}, {  1,23,14}, {  1,22,14}, {  1,22,14},
-   {  1,21,14}, {  1,21,14}, {  1,20,14}, {  1,20,14},
-   {  1,19,14}, {  1,19,14}, {  1,18,14}, {  1,18,14},
-   {  1,17,14}, {  1,17,14}, {  1,16,14}, {  1,16,14}
-};
-
-static const DCTtab DCT_13 [] = {
-   { 11, 2,13}, { 10, 2,13}, {  6, 3,13}, {  4, 4,13},
-   {  3, 5,13}, {  2, 7,13}, {  2, 6,13}, {  1,15,13},
-   {  1,14,13}, {  1,13,13}, {  1,12,13}, { 27, 1,13},
-   { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13},
-   {  1,11,12}, {  1,11,12}, {  9, 2,12}, {  9, 2,12},
-   {  5, 3,12}, {  5, 3,12}, {  1,10,12}, {  1,10,12},
-   {  3, 4,12}, {  3, 4,12}, {  8, 2,12}, {  8, 2,12},
-   { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12},
-   {  1, 9,12}, {  1, 9,12}, { 20, 1,12}, { 20, 1,12},
-   { 19, 1,12}, { 19, 1,12}, {  2, 5,12}, {  2, 5,12},
-   {  4, 3,12}, {  4, 3,12}, {  1, 8,12}, {  1, 8,12},
-   {  7, 2,12}, {  7, 2,12}, { 18, 1,12}, { 18, 1,12}
-};
-
-static const DCTtab DCT_B14_10 [] = {
-   { 17, 1,10}, {  6, 2,10}, {  1, 7,10}, {  3, 3,10},
-   {  2, 4,10}, { 16, 1,10}, { 15, 1,10}, {  5, 2,10}
-};
-
-static const DCTtab DCT_B14_8 [] = {
-   { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6},
-   {  3, 2, 7}, {  3, 2, 7}, { 10, 1, 7}, { 10, 1, 7},
-   {  1, 4, 7}, {  1, 4, 7}, {  9, 1, 7}, {  9, 1, 7},
-   {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6},
-   {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6},
-   {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6},
-   {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
-   { 14, 1, 8}, {  1, 6, 8}, { 13, 1, 8}, { 12, 1, 8},
-   {  4, 2, 8}, {  2, 3, 8}, {  1, 5, 8}, { 11, 1, 8}
-};
-
-static const DCTtab DCT_B14AC_5 [] = {
-                {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
-   {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
-   {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}
-};
-
-static const DCTtab DCT_B14DC_5 [] = {
-                {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
-   {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
-   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
-   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
-   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}
-};
-
-static const DCTtab DCT_B15_10 [] = {
-   {  6, 2, 9}, {  6, 2, 9}, { 15, 1, 9}, { 15, 1, 9},
-   {  3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9}
-};
-
-static const DCTtab DCT_B15_8 [] = {
-   { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6},
-   {  8, 1, 7}, {  8, 1, 7}, {  9, 1, 7}, {  9, 1, 7},
-   {  7, 1, 7}, {  7, 1, 7}, {  3, 2, 7}, {  3, 2, 7},
-   {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6},
-   {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6},
-   {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6},
-   {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
-   {  2, 5, 8}, { 12, 1, 8}, {  1,11, 8}, {  1,10, 8},
-   { 14, 1, 8}, { 13, 1, 8}, {  4, 2, 8}, {  2, 4, 8},
-   {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
-   {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
-   {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
-   {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
-   {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
-   {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
-   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
-   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
-   {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
-   {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
-   {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
-   {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
-   { 10, 1, 7}, { 10, 1, 7}, {  2, 3, 7}, {  2, 3, 7},
-   { 11, 1, 7}, { 11, 1, 7}, {  1, 8, 7}, {  1, 8, 7},
-   {  1, 9, 7}, {  1, 9, 7}, {  1,12, 8}, {  1,13, 8},
-   {  3, 3, 8}, {  5, 2, 8}, {  1,14, 8}, {  1,15, 8}
-};
-
-static const MBAtab MBA_5 [] = {
-                   {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
-   {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
-   {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
-   {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}
-};
-
-static const MBAtab MBA_11 [] = {
-   {32, 11}, {31, 11}, {30, 11}, {29, 11},
-   {28, 11}, {27, 11}, {26, 11}, {25, 11},
-   {24, 11}, {23, 11}, {22, 11}, {21, 11},
-   {20, 10}, {20, 10}, {19, 10}, {19, 10},
-   {18, 10}, {18, 10}, {17, 10}, {17, 10},
-   {16, 10}, {16, 10}, {15, 10}, {15, 10},
-   {14,  8}, {14,  8}, {14,  8}, {14,  8},
-   {14,  8}, {14,  8}, {14,  8}, {14,  8},
-   {13,  8}, {13,  8}, {13,  8}, {13,  8},
-   {13,  8}, {13,  8}, {13,  8}, {13,  8},
-   {12,  8}, {12,  8}, {12,  8}, {12,  8},
-   {12,  8}, {12,  8}, {12,  8}, {12,  8},
-   {11,  8}, {11,  8}, {11,  8}, {11,  8},
-   {11,  8}, {11,  8}, {11,  8}, {11,  8},
-   {10,  8}, {10,  8}, {10,  8}, {10,  8},
-   {10,  8}, {10,  8}, {10,  8}, {10,  8},
-   { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
-   { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
-   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
-   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
-   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
-   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
-   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7}
-};
-
-static const int non_linear_quantizer_scale[] = {
-   0,  1,  2,  3,  4,  5,   6,   7,
-   8, 10, 12, 14, 16, 18,  20,  22,
-   24, 28, 32, 36, 40, 44,  48,  52,
-   56, 64, 72, 80, 88, 96, 104, 112
-};
-
-static INLINE int
-get_macroblock_modes(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture)
+struct dct_coeff_compressed
 {
-   int macroblock_modes;
-   const MBtab * tab;
+   uint32_t bitcode;
+   struct dct_coeff coeff;
+};
 
-   switch (picture->picture_coding_type) {
-   case I_TYPE:
+/* coding table as found in the spec annex B.5 table B-1 */
+static const struct vl_vlc_compressed macroblock_address_increment[] = {
+   { 0x8000, { 1, 1 } },
+   { 0x6000, { 3, 2 } },
+   { 0x4000, { 3, 3 } },
+   { 0x3000, { 4, 4 } },
+   { 0x2000, { 4, 5 } },
+   { 0x1800, { 5, 6 } },
+   { 0x1000, { 5, 7 } },
+   { 0x0e00, { 7, 8 } },
+   { 0x0c00, { 7, 9 } },
+   { 0x0b00, { 8, 10 } },
+   { 0x0a00, { 8, 11 } },
+   { 0x0900, { 8, 12 } },
+   { 0x0800, { 8, 13 } },
+   { 0x0700, { 8, 14 } },
+   { 0x0600, { 8, 15 } },
+   { 0x05c0, { 10, 16 } },
+   { 0x0580, { 10, 17 } },
+   { 0x0540, { 10, 18 } },
+   { 0x0500, { 10, 19 } },
+   { 0x04c0, { 10, 20 } },
+   { 0x0480, { 10, 21 } },
+   { 0x0460, { 11, 22 } },
+   { 0x0440, { 11, 23 } },
+   { 0x0420, { 11, 24 } },
+   { 0x0400, { 11, 25 } },
+   { 0x03e0, { 11, 26 } },
+   { 0x03c0, { 11, 27 } },
+   { 0x03a0, { 11, 28 } },
+   { 0x0380, { 11, 29 } },
+   { 0x0360, { 11, 30 } },
+   { 0x0340, { 11, 31 } },
+   { 0x0320, { 11, 32 } },
+   { 0x0300, { 11, 33 } }
+};
 
-      tab = MB_I + vl_vlc_ubits(&bs->vlc, 1);
-      vl_vlc_dumpbits(&bs->vlc, tab->len);
-      macroblock_modes = tab->modes;
+#define Q PIPE_MPEG12_MB_TYPE_QUANT
+#define F PIPE_MPEG12_MB_TYPE_MOTION_FORWARD
+#define B PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD
+#define P PIPE_MPEG12_MB_TYPE_PATTERN
+#define I PIPE_MPEG12_MB_TYPE_INTRA
 
-      return macroblock_modes;
+/* coding table as found in the spec annex B.5 table B-2 */
+static const struct vl_vlc_compressed macroblock_type_i[] = {
+   { 0x8000, { 1, I } },
+   { 0x4000, { 2, Q|I } }
+};
 
-   case P_TYPE:
+/* coding table as found in the spec annex B.5 table B-3 */
+static const struct vl_vlc_compressed macroblock_type_p[] = {
+   { 0x8000, { 1, F|P } },
+   { 0x4000, { 2, P } },
+   { 0x2000, { 3, F } },
+   { 0x1800, { 5, I } },
+   { 0x1000, { 5, Q|F|P } },
+   { 0x0800, { 5, Q|P } },
+   { 0x0400, { 6, Q|I } }
+};
 
-      tab = MB_P + vl_vlc_ubits(&bs->vlc, 5);
-      vl_vlc_dumpbits(&bs->vlc, tab->len);
-      macroblock_modes = tab->modes;
+/* coding table as found in the spec annex B.5 table B-4 */
+static const struct vl_vlc_compressed macroblock_type_b[] = {
+   { 0x8000, { 2, F|B } },
+   { 0xC000, { 2, F|B|P } },
+   { 0x4000, { 3, B } },
+   { 0x6000, { 3, B|P } },
+   { 0x2000, { 4, F } },
+   { 0x3000, { 4, F|P } },
+   { 0x1800, { 5, I } },
+   { 0x1000, { 5, Q|F|B|P } },
+   { 0x0C00, { 6, Q|F|P } },
+   { 0x0800, { 6, Q|B|P } },
+   { 0x0400, { 6, Q|I } }
+};
 
-      if (picture->picture_structure != FRAME_PICTURE) {
-         if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
-            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
-            vl_vlc_dumpbits(&bs->vlc, 2);
-          }
-          return macroblock_modes;
-      } else if (picture->frame_pred_frame_dct) {
-          if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
-            macroblock_modes |= MC_FRAME;
-          return macroblock_modes;
-      } else {
-          if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
-            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
-            vl_vlc_dumpbits(&bs->vlc, 2);
-          }
-          return macroblock_modes;
-      }
+#undef Q
+#undef F
+#undef B
+#undef P
+#undef I
 
-   case B_TYPE:
+/* coding table as found in the spec annex B.5 table B-9 */
+static const struct vl_vlc_compressed coded_block_pattern[] = {
+   { 0xE000, { 3, 60 } },
+   { 0xD000, { 4, 4 } },
+   { 0xC000, { 4, 8 } },
+   { 0xB000, { 4, 16 } },
+   { 0xA000, { 4, 32 } },
+   { 0x9800, { 5, 12 } },
+   { 0x9000, { 5, 48 } },
+   { 0x8800, { 5, 20 } },
+   { 0x8000, { 5, 40 } },
+   { 0x7800, { 5, 28 } },
+   { 0x7000, { 5, 44 } },
+   { 0x6800, { 5, 52 } },
+   { 0x6000, { 5, 56 } },
+   { 0x5800, { 5, 1 } },
+   { 0x5000, { 5, 61 } },
+   { 0x4800, { 5, 2 } },
+   { 0x4000, { 5, 62 } },
+   { 0x3C00, { 6, 24 } },
+   { 0x3800, { 6, 36 } },
+   { 0x3400, { 6, 3 } },
+   { 0x3000, { 6, 63 } },
+   { 0x2E00, { 7, 5 } },
+   { 0x2C00, { 7, 9 } },
+   { 0x2A00, { 7, 17 } },
+   { 0x2800, { 7, 33 } },
+   { 0x2600, { 7, 6 } },
+   { 0x2400, { 7, 10 } },
+   { 0x2200, { 7, 18 } },
+   { 0x2000, { 7, 34 } },
+   { 0x1F00, { 8, 7 } },
+   { 0x1E00, { 8, 11 } },
+   { 0x1D00, { 8, 19 } },
+   { 0x1C00, { 8, 35 } },
+   { 0x1B00, { 8, 13 } },
+   { 0x1A00, { 8, 49 } },
+   { 0x1900, { 8, 21 } },
+   { 0x1800, { 8, 41 } },
+   { 0x1700, { 8, 14 } },
+   { 0x1600, { 8, 50 } },
+   { 0x1500, { 8, 22 } },
+   { 0x1400, { 8, 42 } },
+   { 0x1300, { 8, 15 } },
+   { 0x1200, { 8, 51 } },
+   { 0x1100, { 8, 23 } },
+   { 0x1000, { 8, 43 } },
+   { 0x0F00, { 8, 25 } },
+   { 0x0E00, { 8, 37 } },
+   { 0x0D00, { 8, 26 } },
+   { 0x0C00, { 8, 38 } },
+   { 0x0B00, { 8, 29 } },
+   { 0x0A00, { 8, 45 } },
+   { 0x0900, { 8, 53 } },
+   { 0x0800, { 8, 57 } },
+   { 0x0700, { 8, 30 } },
+   { 0x0600, { 8, 46 } },
+   { 0x0500, { 8, 54 } },
+   { 0x0400, { 8, 58 } },
+   { 0x0380, { 9, 31 } },
+   { 0x0300, { 9, 47 } },
+   { 0x0280, { 9, 55 } },
+   { 0x0200, { 9, 59 } },
+   { 0x0180, { 9, 27 } },
+   { 0x0100, { 9, 39 } },
+   { 0x0080, { 9, 0 } }
+};
 
-      tab = MB_B + vl_vlc_ubits(&bs->vlc, 6);
-      vl_vlc_dumpbits(&bs->vlc, tab->len);
-      macroblock_modes = tab->modes;
+/* coding table as found in the spec annex B.5 table B-10 */
+static const struct vl_vlc_compressed motion_code[] = {
+   { 0x0320, { 11, -16 } },
+   { 0x0360, { 11, -15 } },
+   { 0x03a0, { 11, -14 } },
+   { 0x03e0, { 11, -13 } },
+   { 0x0420, { 11, -12 } },
+   { 0x0460, { 11, -11 } },
+   { 0x04c0, { 10, -10 } },
+   { 0x0540, { 10, -9 } },
+   { 0x05c0, { 10, -8 } },
+   { 0x0700, { 8, -7 } },
+   { 0x0900, { 8, -6 } },
+   { 0x0b00, { 8, -5 } },
+   { 0x0e00, { 7, -4 } },
+   { 0x1800, { 5, -3 } },
+   { 0x3000, { 4, -2 } },
+   { 0x6000, { 3, -1 } },
+   { 0x8000, { 1, 0 } },
+   { 0x4000, { 3, 1 } },
+   { 0x2000, { 4, 2 } },
+   { 0x1000, { 5, 3 } },
+   { 0x0c00, { 7, 4 } },
+   { 0x0a00, { 8, 5 } },
+   { 0x0800, { 8, 6 } },
+   { 0x0600, { 8, 7 } },
+   { 0x0580, { 10, 8 } },
+   { 0x0500, { 10, 9 } },
+   { 0x0480, { 10, 10 } },
+   { 0x0440, { 11, 11 } },
+   { 0x0400, { 11, 12 } },
+   { 0x03c0, { 11, 13 } },
+   { 0x0380, { 11, 14 } },
+   { 0x0340, { 11, 15 } },
+   { 0x0300, { 11, 16 } }
+};
 
-      if (picture->picture_structure != FRAME_PICTURE) {
-          if (! (macroblock_modes & MACROBLOCK_INTRA)) {
-            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
-            vl_vlc_dumpbits(&bs->vlc, 2);
-          }
-      } else if (picture->frame_pred_frame_dct) {
-          macroblock_modes |= MC_FRAME;
-      } else if (!(macroblock_modes & MACROBLOCK_INTRA)) {
-          macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
-          vl_vlc_dumpbits(&bs->vlc, 2);
-      }
-      return macroblock_modes;
+/* coding table as found in the spec annex B.5 table B-11 */
+static const struct vl_vlc_compressed dmvector[] = {
+   { 0x0000, { 1, 0 } },
+   { 0x8000, { 2, 1 } },
+   { 0xc000, { 2, -1 } }
+};
 
-   case D_TYPE:
+/* coding table as found in the spec annex B.5 table B-12 */
+static const struct vl_vlc_compressed dct_dc_size_luminance[] = {
+   { 0x8000, { 3, 0 } },
+   { 0x0000, { 2, 1 } },
+   { 0x4000, { 2, 2 } },
+   { 0xA000, { 3, 3 } },
+   { 0xC000, { 3, 4 } },
+   { 0xE000, { 4, 5 } },
+   { 0xF000, { 5, 6 } },
+   { 0xF800, { 6, 7 } },
+   { 0xFC00, { 7, 8 } },
+   { 0xFE00, { 8, 9 } },
+   { 0xFF00, { 9, 10 } },
+   { 0xFF80, { 9, 11 } }
+};
 
-      vl_vlc_dumpbits(&bs->vlc, 1);
-      return MACROBLOCK_INTRA;
+/* coding table as found in the spec annex B.5 table B-13 */
+static const struct vl_vlc_compressed dct_dc_size_chrominance[] = {
+   { 0x0000, { 2, 0 } },
+   { 0x4000, { 2, 1 } },
+   { 0x8000, { 2, 2 } },
+   { 0xC000, { 3, 3 } },
+   { 0xE000, { 4, 4 } },
+   { 0xF000, { 5, 5 } },
+   { 0xF800, { 6, 6 } },
+   { 0xFC00, { 7, 7 } },
+   { 0xFE00, { 8, 8 } },
+   { 0xFF00, { 9, 9 } },
+   { 0xFF80, { 10, 10 } },
+   { 0xFFC0, { 10, 11 } }
+};
 
-   default:
-      return 0;
-   }
-}
+/* coding table as found in the spec annex B.5 table B-14 */
+static const struct dct_coeff_compressed dct_coeff_tbl_zero[] = {
+   { 0x8000, { 2, dct_End_of_Block, 0 } },
+   { 0x8000, { 1, dct_DC, 1 } },
+   { 0xC000, { 2, dct_AC, 1 } },
+   { 0x6000, { 3, 1, 1 } },
+   { 0x4000, { 4, 0, 2 } },
+   { 0x5000, { 4, 2, 1 } },
+   { 0x2800, { 5, 0, 3 } },
+   { 0x3800, { 5, 3, 1 } },
+   { 0x3000, { 5, 4, 1 } },
+   { 0x1800, { 6, 1, 2 } },
+   { 0x1C00, { 6, 5, 1 } },
+   { 0x1400, { 6, 6, 1 } },
+   { 0x1000, { 6, 7, 1 } },
+   { 0x0C00, { 7, 0, 4 } },
+   { 0x0800, { 7, 2, 2 } },
+   { 0x0E00, { 7, 8, 1 } },
+   { 0x0A00, { 7, 9, 1 } },
+   { 0x0400, { 6, dct_Escape, 0 } },
+   { 0x2600, { 8, 0, 5 } },
+   { 0x2100, { 8, 0, 6 } },
+   { 0x2500, { 8, 1, 3 } },
+   { 0x2400, { 8, 3, 2 } },
+   { 0x2700, { 8, 10, 1 } },
+   { 0x2300, { 8, 11, 1 } },
+   { 0x2200, { 8, 12, 1 } },
+   { 0x2000, { 8, 13, 1 } },
+   { 0x0280, { 10, 0, 7 } },
+   { 0x0300, { 10, 1, 4 } },
+   { 0x02C0, { 10, 2, 3 } },
+   { 0x03C0, { 10, 4, 2 } },
+   { 0x0240, { 10, 5, 2 } },
+   { 0x0380, { 10, 14, 1 } },
+   { 0x0340, { 10, 15, 1 } },
+   { 0x0200, { 10, 16, 1 } },
+   { 0x01D0, { 12, 0, 8 } },
+   { 0x0180, { 12, 0, 9 } },
+   { 0x0130, { 12, 0, 10 } },
+   { 0x0100, { 12, 0, 11 } },
+   { 0x01B0, { 12, 1, 5 } },
+   { 0x0140, { 12, 2, 4 } },
+   { 0x01C0, { 12, 3, 3 } },
+   { 0x0120, { 12, 4, 3 } },
+   { 0x01E0, { 12, 6, 2 } },
+   { 0x0150, { 12, 7, 2 } },
+   { 0x0110, { 12, 8, 2 } },
+   { 0x01F0, { 12, 17, 1 } },
+   { 0x01A0, { 12, 18, 1 } },
+   { 0x0190, { 12, 19, 1 } },
+   { 0x0170, { 12, 20, 1 } },
+   { 0x0160, { 12, 21, 1 } },
+   { 0x00D0, { 13, 0, 12 } },
+   { 0x00C8, { 13, 0, 13 } },
+   { 0x00C0, { 13, 0, 14 } },
+   { 0x00B8, { 13, 0, 15 } },
+   { 0x00B0, { 13, 1, 6 } },
+   { 0x00A8, { 13, 1, 7 } },
+   { 0x00A0, { 13, 2, 5 } },
+   { 0x0098, { 13, 3, 4 } },
+   { 0x0090, { 13, 5, 3 } },
+   { 0x0088, { 13, 9, 2 } },
+   { 0x0080, { 13, 10, 2 } },
+   { 0x00F8, { 13, 22, 1 } },
+   { 0x00F0, { 13, 23, 1 } },
+   { 0x00E8, { 13, 24, 1 } },
+   { 0x00E0, { 13, 25, 1 } },
+   { 0x00D8, { 13, 26, 1 } },
+   { 0x007C, { 14, 0, 16 } },
+   { 0x0078, { 14, 0, 17 } },
+   { 0x0074, { 14, 0, 18 } },
+   { 0x0070, { 14, 0, 19 } },
+   { 0x006C, { 14, 0, 20 } },
+   { 0x0068, { 14, 0, 21 } },
+   { 0x0064, { 14, 0, 22 } },
+   { 0x0060, { 14, 0, 23 } },
+   { 0x005C, { 14, 0, 24 } },
+   { 0x0058, { 14, 0, 25 } },
+   { 0x0054, { 14, 0, 26 } },
+   { 0x0050, { 14, 0, 27 } },
+   { 0x004C, { 14, 0, 28 } },
+   { 0x0048, { 14, 0, 29 } },
+   { 0x0044, { 14, 0, 30 } },
+   { 0x0040, { 14, 0, 31 } },
+   { 0x0030, { 15, 0, 32 } },
+   { 0x002E, { 15, 0, 33 } },
+   { 0x002C, { 15, 0, 34 } },
+   { 0x002A, { 15, 0, 35 } },
+   { 0x0028, { 15, 0, 36 } },
+   { 0x0026, { 15, 0, 37 } },
+   { 0x0024, { 15, 0, 38 } },
+   { 0x0022, { 15, 0, 39 } },
+   { 0x0020, { 15, 0, 40 } },
+   { 0x003E, { 15, 1, 8 } },
+   { 0x003C, { 15, 1, 9 } },
+   { 0x003A, { 15, 1, 10 } },
+   { 0x0038, { 15, 1, 11 } },
+   { 0x0036, { 15, 1, 12 } },
+   { 0x0034, { 15, 1, 13 } },
+   { 0x0032, { 15, 1, 14 } },
+   { 0x0013, { 16, 1, 15 } },
+   { 0x0012, { 16, 1, 16 } },
+   { 0x0011, { 16, 1, 17 } },
+   { 0x0010, { 16, 1, 18 } },
+   { 0x0014, { 16, 6, 3 } },
+   { 0x001A, { 16, 11, 2 } },
+   { 0x0019, { 16, 12, 2 } },
+   { 0x0018, { 16, 13, 2 } },
+   { 0x0017, { 16, 14, 2 } },
+   { 0x0016, { 16, 15, 2 } },
+   { 0x0015, { 16, 16, 2 } },
+   { 0x001F, { 16, 27, 1 } },
+   { 0x001E, { 16, 28, 1 } },
+   { 0x001D, { 16, 29, 1 } },
+   { 0x001C, { 16, 30, 1 } },
+   { 0x001B, { 16, 31, 1 } }
+};
 
-static INLINE enum pipe_mpeg12_dct_type
-get_dct_type(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int macroblock_modes)
-{
-   enum pipe_mpeg12_dct_type dct_type = PIPE_MPEG12_DCT_TYPE_FRAME;
+/* coding table as found in the spec annex B.5 table B-15 */
+static const struct dct_coeff_compressed dct_coeff_tbl_one[] = {
+   { 0x6000, { 4, dct_End_of_Block, 0 } },
+   { 0x8000, { 2, 0, 1 } },
+   { 0x4000, { 3, 1, 1 } },
+   { 0xC000, { 3, 0, 2 } },
+   { 0x2800, { 5, 2, 1 } },
+   { 0x7000, { 4, 0, 3 } },
+   { 0x3800, { 5, 3, 1 } },
+   { 0x1800, { 6, 4, 1 } },
+   { 0x3000, { 5, 1, 2 } },
+   { 0x1C00, { 6, 5, 1 } },
+   { 0x0C00, { 7, 6, 1 } },
+   { 0x0800, { 7, 7, 1 } },
+   { 0xE000, { 5, 0, 4 } },
+   { 0x0E00, { 7, 2, 2 } },
+   { 0x0A00, { 7, 8, 1 } },
+   { 0xF000, { 7, 9, 1 } },
+   { 0x0400, { 6, dct_Escape, 0 } },
+   { 0xE800, { 5, 0, 5 } },
+   { 0x1400, { 6, 0, 6 } },
+   { 0xF200, { 7, 1, 3 } },
+   { 0x2600, { 8, 3, 2 } },
+   { 0xF400, { 7, 10, 1 } },
+   { 0x2100, { 8, 11, 1 } },
+   { 0x2500, { 8, 12, 1 } },
+   { 0x2400, { 8, 13, 1 } },
+   { 0x1000, { 6, 0, 7 } },
+   { 0x2700, { 8, 1, 4 } },
+   { 0xFC00, { 8, 2, 3 } },
+   { 0xFD00, { 8, 4, 2 } },
+   { 0x0200, { 9, 5, 2 } },
+   { 0x0280, { 9, 14, 1 } },
+   { 0x0380, { 9, 15, 1 } },
+   { 0x0340, { 10, 16, 1 } },
+   { 0xF600, { 7, 0, 8 } },
+   { 0xF800, { 7, 0, 9 } },
+   { 0x2300, { 8, 0, 10 } },
+   { 0x2200, { 8, 0, 11 } },
+   { 0x2000, { 8, 1, 5 } },
+   { 0x0300, { 10, 2, 4 } },
+   { 0x01C0, { 12, 3, 3 } },
+   { 0x0120, { 12, 4, 3 } },
+   { 0x01E0, { 12, 6, 2 } },
+   { 0x0150, { 12, 7, 2 } },
+   { 0x0110, { 12, 8, 2 } },
+   { 0x01F0, { 12, 17, 1 } },
+   { 0x01A0, { 12, 18, 1 } },
+   { 0x0190, { 12, 19, 1 } },
+   { 0x0170, { 12, 20, 1 } },
+   { 0x0160, { 12, 21, 1 } },
+   { 0xFA00, { 8, 0, 12 } },
+   { 0xFB00, { 8, 0, 13 } },
+   { 0xFE00, { 8, 0, 14 } },
+   { 0xFF00, { 8, 0, 15 } },
+   { 0x00B0, { 13, 1, 6 } },
+   { 0x00A8, { 13, 1, 7 } },
+   { 0x00A0, { 13, 2, 5 } },
+   { 0x0098, { 13, 3, 4 } },
+   { 0x0090, { 13, 5, 3 } },
+   { 0x0088, { 13, 9, 2 } },
+   { 0x0080, { 13, 10, 2 } },
+   { 0x00F8, { 13, 22, 1 } },
+   { 0x00F0, { 13, 23, 1 } },
+   { 0x00E8, { 13, 24, 1 } },
+   { 0x00E0, { 13, 25, 1 } },
+   { 0x00D8, { 13, 26, 1 } },
+   { 0x007C, { 14, 0, 16 } },
+   { 0x0078, { 14, 0, 17 } },
+   { 0x0074, { 14, 0, 18 } },
+   { 0x0070, { 14, 0, 19 } },
+   { 0x006C, { 14, 0, 20 } },
+   { 0x0068, { 14, 0, 21 } },
+   { 0x0064, { 14, 0, 22 } },
+   { 0x0060, { 14, 0, 23 } },
+   { 0x005C, { 14, 0, 24 } },
+   { 0x0058, { 14, 0, 25 } },
+   { 0x0054, { 14, 0, 26 } },
+   { 0x0050, { 14, 0, 27 } },
+   { 0x004C, { 14, 0, 28 } },
+   { 0x0048, { 14, 0, 29 } },
+   { 0x0044, { 14, 0, 30 } },
+   { 0x0040, { 14, 0, 31 } },
+   { 0x0030, { 15, 0, 32 } },
+   { 0x002E, { 15, 0, 33 } },
+   { 0x002C, { 15, 0, 34 } },
+   { 0x002A, { 15, 0, 35 } },
+   { 0x0028, { 15, 0, 36 } },
+   { 0x0026, { 15, 0, 37 } },
+   { 0x0024, { 15, 0, 38 } },
+   { 0x0022, { 15, 0, 39 } },
+   { 0x0020, { 15, 0, 40 } },
+   { 0x003E, { 15, 1, 8 } },
+   { 0x003C, { 15, 1, 9 } },
+   { 0x003A, { 15, 1, 10 } },
+   { 0x0038, { 15, 1, 11 } },
+   { 0x0036, { 15, 1, 12 } },
+   { 0x0034, { 15, 1, 13 } },
+   { 0x0032, { 15, 1, 14 } },
+   { 0x0013, { 16, 1, 15 } },
+   { 0x0012, { 16, 1, 16 } },
+   { 0x0011, { 16, 1, 17 } },
+   { 0x0010, { 16, 1, 18 } },
+   { 0x0014, { 16, 6, 3 } },
+   { 0x001A, { 16, 11, 2 } },
+   { 0x0019, { 16, 12, 2 } },
+   { 0x0018, { 16, 13, 2 } },
+   { 0x0017, { 16, 14, 2 } },
+   { 0x0016, { 16, 15, 2 } },
+   { 0x0015, { 16, 16, 2 } },
+   { 0x001F, { 16, 27, 1 } },
+   { 0x001E, { 16, 28, 1 } },
+   { 0x001D, { 16, 29, 1 } },
+   { 0x001C, { 16, 30, 1 } },
+   { 0x001B, { 16, 31, 1 } }
+};
 
-   if ((picture->picture_structure == FRAME_PICTURE) &&
-       (!picture->frame_pred_frame_dct) &&
-       (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))) {
+/* q_scale_type */
+static const unsigned quant_scale[2][32] = {
+  { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30,
+    32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62 },
+  { 0, 1, 2, 3, 4,  5,  6,  7,  8, 10, 12, 14, 16, 18, 20, 22, 24,
+    28, 32, 36, 40, 44, 48, 52, 56, 64, 72, 80, 88, 96, 104, 112 }
+};
 
-      dct_type = vl_vlc_ubits(&bs->vlc, 1) ? PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
-      vl_vlc_dumpbits(&bs->vlc, 1);
-   }
-   return dct_type;
-}
-
-static INLINE int
-get_quantizer_scale(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture)
-{
-   int quantizer_scale_code;
-
-   quantizer_scale_code = vl_vlc_ubits(&bs->vlc, 5);
-   vl_vlc_dumpbits(&bs->vlc, 5);
-
-   if (picture->q_scale_type)
-      return non_linear_quantizer_scale[quantizer_scale_code];
-   else
-      return quantizer_scale_code << 1;
-}
-
-static INLINE int
-get_motion_delta(struct vl_mpg12_bs *bs, unsigned f_code)
-{
-   int delta;
-   int sign;
-   const MVtab * tab;
-
-   if (bs->vlc.buf & 0x80000000) {
-      vl_vlc_dumpbits(&bs->vlc, 1);
-      return 0;
-   } else if (bs->vlc.buf >= 0x0c000000) {
-
-      tab = MV_4 + vl_vlc_ubits(&bs->vlc, 4);
-      delta = (tab->delta << f_code) + 1;
-      bs->vlc.bits += tab->len + f_code + 1;
-      bs->vlc.buf <<= tab->len;
-
-      sign = vl_vlc_sbits(&bs->vlc, 1);
-      bs->vlc.buf <<= 1;
-
-      if (f_code)
-         delta += vl_vlc_ubits(&bs->vlc, f_code);
-      bs->vlc.buf <<= f_code;
-
-      return (delta ^ sign) - sign;
-
-   } else {
-
-      tab = MV_10 + vl_vlc_ubits(&bs->vlc, 10);
-      delta = (tab->delta << f_code) + 1;
-      bs->vlc.bits += tab->len + 1;
-      bs->vlc.buf <<= tab->len;
-
-      sign = vl_vlc_sbits(&bs->vlc, 1);
-      bs->vlc.buf <<= 1;
-
-      if (f_code) {
-         vl_vlc_needbits(&bs->vlc);
-         delta += vl_vlc_ubits(&bs->vlc, f_code);
-         vl_vlc_dumpbits(&bs->vlc, f_code);
-      }
-
-      return (delta ^ sign) - sign;
-   }
-}
-
-static INLINE int
-bound_motion_vector(int vec, unsigned f_code)
-{
-#if 1
-   unsigned int limit;
-   int sign;
-
-   limit = 16 << f_code;
-
-   if ((unsigned int)(vec + limit) < 2 * limit)
-      return vec;
-   else {
-      sign = ((int32_t)vec) >> 31;
-      return vec - ((2 * limit) ^ sign) + sign;
-   }
-#else
-   return ((int32_t)vec << (28 - f_code)) >> (28 - f_code);
-#endif
-}
-
-static INLINE int
-get_dmv(struct vl_mpg12_bs *bs)
-{
-   const DMVtab * tab;
-
-   tab = DMV_2 + vl_vlc_ubits(&bs->vlc, 2);
-   vl_vlc_dumpbits(&bs->vlc, tab->len);
-   return tab->dmv;
-}
-
-static INLINE int
-get_coded_block_pattern(struct vl_mpg12_bs *bs)
-{
-   const CBPtab * tab;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   if (bs->vlc.buf >= 0x20000000) {
-
-      tab = CBP_7 + (vl_vlc_ubits(&bs->vlc, 7) - 16);
-      vl_vlc_dumpbits(&bs->vlc, tab->len);
-      return tab->cbp;
-
-   } else {
-
-      tab = CBP_9 + vl_vlc_ubits(&bs->vlc, 9);
-      vl_vlc_dumpbits(&bs->vlc, tab->len);
-      return tab->cbp;
-   }
-}
-
-static INLINE int
-get_luma_dc_dct_diff(struct vl_mpg12_bs *bs)
-{
-   const DCtab * tab;
-   int size;
-   int dc_diff;
-
-   if (bs->vlc.buf < 0xf8000000) {
-      tab = DC_lum_5 + vl_vlc_ubits(&bs->vlc, 5);
-      size = tab->size;
-      if (size) {
-         bs->vlc.bits += tab->len + size;
-         bs->vlc.buf <<= tab->len;
-         dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
-         bs->vlc.buf <<= size;
-         return dc_diff;
-      } else {
-         vl_vlc_dumpbits(&bs->vlc, 3);
-         return 0;
-      }
-   } else {
-      tab = DC_long + (vl_vlc_ubits(&bs->vlc, 9) - 0x1e0);
-      size = tab->size;
-      vl_vlc_dumpbits(&bs->vlc, tab->len);
-      vl_vlc_needbits(&bs->vlc);
-      dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
-      vl_vlc_dumpbits(&bs->vlc, size);
-      return dc_diff;
-   }
-}
-
-static INLINE int
-get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs)
-{
-   const DCtab * tab;
-   int size;
-   int dc_diff;
-
-   if (bs->vlc.buf < 0xf8000000) {
-      tab = DC_chrom_5 + vl_vlc_ubits(&bs->vlc, 5);
-      size = tab->size;
-      if (size) {
-         bs->vlc.bits += tab->len + size;
-         bs->vlc.buf <<= tab->len;
-         dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
-         bs->vlc.buf <<= size;
-         return dc_diff;
-      } else {
-         vl_vlc_dumpbits(&bs->vlc, 2);
-         return 0;
-      }
-   } else {
-      tab = DC_long + (vl_vlc_ubits(&bs->vlc, 10) - 0x3e0);
-      size = tab->size;
-      vl_vlc_dumpbits(&bs->vlc, tab->len + 1);
-      vl_vlc_needbits(&bs->vlc);
-      dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
-      vl_vlc_dumpbits(&bs->vlc, size);
-      return dc_diff;
-   }
-}
+static struct vl_vlc_entry tbl_B1[1 << 11];
+static struct vl_vlc_entry tbl_B2[1 << 2];
+static struct vl_vlc_entry tbl_B3[1 << 6];
+static struct vl_vlc_entry tbl_B4[1 << 6];
+static struct vl_vlc_entry tbl_B9[1 << 9];
+static struct vl_vlc_entry tbl_B10[1 << 11];
+static struct vl_vlc_entry tbl_B11[1 << 2];
+static struct vl_vlc_entry tbl_B12[1 << 10];
+static struct vl_vlc_entry tbl_B13[1 << 10];
+static struct dct_coeff tbl_B14_DC[1 << 17];
+static struct dct_coeff tbl_B14_AC[1 << 17];
+static struct dct_coeff tbl_B15[1 << 17];
 
 static INLINE void
-get_intra_block_B14(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
+init_dct_coeff_table(struct dct_coeff *dst, const struct dct_coeff_compressed *src,
+                     unsigned size, bool is_DC)
 {
-   int i, val;
-   const DCTtab *tab;
+   unsigned i;
 
-   i = 0;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   while (1) {
-      if (bs->vlc.buf >= 0x28000000) {
-
-         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
-
-         i += tab->run;
-         if (i >= 64)
-            break;	/* end of block */
-
-      normal_code:
-         bs->vlc.buf <<= tab->len;
-         bs->vlc.bits += tab->len + 1;
-         val = tab->level * quantizer_scale;
-
-         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
-
-         dest[i] = val;
-
-         bs->vlc.buf <<= 1;
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      } else if (bs->vlc.buf >= 0x04000000) {
-
-         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
-
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-
-         /* escape code */
-
-         i += UBITS(bs->vlc.buf << 6, 6) - 64;
-         if (i >= 64)
-            break;	/* illegal, check needed to avoid buffer overflow */
-
-         vl_vlc_dumpbits(&bs->vlc, 12);
-         vl_vlc_needbits(&bs->vlc);
-         val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;
-
-         dest[i] = val;
-
-         vl_vlc_dumpbits(&bs->vlc, 12);
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      } else if (bs->vlc.buf >= 0x02000000) {
-         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00800000) {
-         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00200000) {
-         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else {
-         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
-         bs->vlc.buf <<= 16;
-         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      }
-      break;	/* illegal, check needed to avoid buffer overflow */
+   for (i=0;i<(1<<17);++i) {
+      dst[i].length = 0;
+      dst[i].level = 0;
+      dst[i].run = dct_End_of_Block;
    }
 
-   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
-}
-
-static INLINE void
-get_intra_block_B15(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
-{
-   int i, val;
-   const DCTtab * tab;
-
-   i = 0;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   while (1) {
-      if (bs->vlc.buf >= 0x04000000) {
-
-         tab = DCT_B15_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
-
-         i += tab->run;
-         if (i < 64) {
-
-         normal_code:
-            bs->vlc.buf <<= tab->len;
-            bs->vlc.bits += tab->len + 1;
-            val = tab->level * quantizer_scale;
-
-            val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
-
-            dest[i] = val;
-
-            bs->vlc.buf <<= 1;
-            vl_vlc_needbits(&bs->vlc);
+   for(; size > 0; --size, ++src) {
+      struct dct_coeff coeff = src->coeff;
+      bool has_sign = true;
 
+      switch (coeff.run) {
+      case dct_End_of_Block:
+         if (is_DC)
             continue;
 
-         } else {
+         has_sign = false;
+         break;
 
-            /* end of block. I commented out this code because if we */
-            /* dont exit here we will still exit at the later test :) */
-
-            /* if (i >= 128) break;	*/	/* end of block */
-
-            /* escape code */
-
-            i += UBITS(bs->vlc.buf << 6, 6) - 64;
-            if (i >= 64)
-                break;	/* illegal, check against buffer overflow */
-
-            vl_vlc_dumpbits(&bs->vlc, 12);
-            vl_vlc_needbits(&bs->vlc);
-            val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;
-
-            dest[i] = val;
-
-            vl_vlc_dumpbits(&bs->vlc, 12);
-            vl_vlc_needbits(&bs->vlc);
+      case dct_Escape:
+         has_sign = false;
+         break;
 
+      case dct_DC:
+         if (!is_DC)
             continue;
 
-          }
-      } else if (bs->vlc.buf >= 0x02000000) {
-         tab = DCT_B15_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00800000) {
-         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00200000) {
-         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else {
-         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
-         bs->vlc.buf <<= 16;
-         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      }
-      break;	/* illegal, check needed to avoid buffer overflow */
-   }
+         coeff.length += 1;
+         coeff.run = 1;
+         break;
 
-   vl_vlc_dumpbits(&bs->vlc, 4);	/* dump end of block code */
-}
+      case dct_AC:
+         if (is_DC)
+            continue;
 
-static INLINE void
-get_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
-{
-   int i, val;
-   const DCTtab *tab;
-
-   i = -1;
-
-   vl_vlc_needbits(&bs->vlc);
-   if (bs->vlc.buf >= 0x28000000) {
-      tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
-      goto entry_1;
-   } else
-      goto entry_2;
-
-   while (1) {
-      if (bs->vlc.buf >= 0x28000000) {
-
-         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
-
-      entry_1:
-         i += tab->run;
-         if (i >= 64)
-            break;	/* end of block */
-
-      normal_code:
-         bs->vlc.buf <<= tab->len;
-         bs->vlc.bits += tab->len + 1;
-         val = ((2*tab->level+1) * quantizer_scale) >> 1;
-
-         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
-
-         dest[i] = val;
-
-         bs->vlc.buf <<= 1;
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      }
-
-   entry_2:
-      if (bs->vlc.buf >= 0x04000000) {
-
-         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
-
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-
-         /* escape code */
-
-         i += UBITS(bs->vlc.buf << 6, 6) - 64;
-         if (i >= 64)
-            break;	/* illegal, check needed to avoid buffer overflow */
-
-         vl_vlc_dumpbits(&bs->vlc, 12);
-         vl_vlc_needbits(&bs->vlc);
-         val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1;
-         val = (val * quantizer_scale) / 2;
-
-         dest[i] = val;
-
-         vl_vlc_dumpbits(&bs->vlc, 12);
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      } else if (bs->vlc.buf >= 0x02000000) {
-         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00800000) {
-         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00200000) {
-         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else {
-         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
-         bs->vlc.buf <<= 16;
-         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      }
-      break;	/* illegal, check needed to avoid buffer overflow */
-   }
-   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
-}
-
-static INLINE void
-get_mpeg1_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
-{
-   int i, val;
-   const DCTtab * tab;
-
-   i = 0;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   while (1) {
-      if (bs->vlc.buf >= 0x28000000) {
-
-         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
-
-         i += tab->run;
-         if (i >= 64)
-            break;	/* end of block */
-
-      normal_code:
-         bs->vlc.buf <<= tab->len;
-         bs->vlc.bits += tab->len + 1;
-         val = tab->level * quantizer_scale;
-
-         /* oddification */
-         val = (val - 1) | 1;
-
-         /* if (bitstream_get (1)) val = -val; */
-         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
-
-         dest[i] = val;
-
-         bs->vlc.buf <<= 1;
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      } else if (bs->vlc.buf >= 0x04000000) {
-
-         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
-
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-
-         /* escape code */
-
-         i += UBITS(bs->vlc.buf << 6, 6) - 64;
-         if (i >= 64)
-            break;	/* illegal, check needed to avoid buffer overflow */
-
-         vl_vlc_dumpbits(&bs->vlc, 12);
-         vl_vlc_needbits(&bs->vlc);
-         val = vl_vlc_sbits(&bs->vlc, 8);
-         if (! (val & 0x7f)) {
-            vl_vlc_dumpbits(&bs->vlc, 8);
-            val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
-         }
-         val = val * quantizer_scale;
-
-         /* oddification */
-         val = (val + ~SBITS (val, 1)) | 1;
-
-         dest[i] = val;
-
-         vl_vlc_dumpbits(&bs->vlc, 8);
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      } else if (bs->vlc.buf >= 0x02000000) {
-         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00800000) {
-         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00200000) {
-         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else {
-         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
-         bs->vlc.buf <<= 16;
-         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      }
-      break;	/* illegal, check needed to avoid buffer overflow */
-   }
-   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
-}
-
-static INLINE void
-get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
-{
-   int i, val;
-   const DCTtab * tab;
-
-   i = -1;
-
-   vl_vlc_needbits(&bs->vlc);
-   if (bs->vlc.buf >= 0x28000000) {
-      tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
-      goto entry_1;
-   } else
-      goto entry_2;
-
-   while (1) {
-      if (bs->vlc.buf >= 0x28000000) {
-
-         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
-
-      entry_1:
-         i += tab->run;
-         if (i >= 64)
-            break;	/* end of block */
-
-      normal_code:
-         bs->vlc.buf <<= tab->len;
-         bs->vlc.bits += tab->len + 1;
-         val = ((2*tab->level+1) * quantizer_scale) >> 1;
-
-         /* oddification */
-         val = (val - 1) | 1;
-
-         /* if (bitstream_get (1)) val = -val; */
-         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
-
-         dest[i] = val;
-
-         bs->vlc.buf <<= 1;
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      }
-
-   entry_2:
-      if (bs->vlc.buf >= 0x04000000) {
-
-         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
-
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-
-         /* escape code */
-
-         i += UBITS(bs->vlc.buf << 6, 6) - 64;
-         if (i >= 64)
-            break;	/* illegal, check needed to avoid buffer overflow */
-
-         vl_vlc_dumpbits(&bs->vlc, 12);
-         vl_vlc_needbits(&bs->vlc);
-         val = vl_vlc_sbits(&bs->vlc, 8);
-         if (! (val & 0x7f)) {
-            vl_vlc_dumpbits(&bs->vlc, 8);
-            val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
-         }
-         val = 2 * (val + SBITS (val, 1)) + 1;
-         val = (val * quantizer_scale) / 2;
-
-         /* oddification */
-         val = (val + ~SBITS (val, 1)) | 1;
-
-         dest[i] = val;
-
-         vl_vlc_dumpbits(&bs->vlc, 8);
-         vl_vlc_needbits(&bs->vlc);
-
-         continue;
-
-      } else if (bs->vlc.buf >= 0x02000000) {
-         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00800000) {
-         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else if (bs->vlc.buf >= 0x00200000) {
-         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      } else {
-         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
-         bs->vlc.buf <<= 16;
-         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
-         i += tab->run;
-         if (i < 64)
-            goto normal_code;
-      }
-      break;	/* illegal, check needed to avoid buffer overflow */
-   }
-   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
-}
-
-static INLINE void
-slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
-                 unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3])
-{
-   short dest[64];
-
-   bs->ycbcr_stream[cc]->x = x;
-   bs->ycbcr_stream[cc]->y = y;
-   bs->ycbcr_stream[cc]->intra = 1;
-   bs->ycbcr_stream[cc]->coding = coding;
-   bs->ycbcr_stream[cc]->block_num = bs->block_num++;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   /* Get the intra DC coefficient and inverse quantize it */
-   if (cc == 0)
-      dc_dct_pred[0] += get_luma_dc_dct_diff(bs);
-   else
-      dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs);
-
-   memset(dest, 0, sizeof(int16_t) * 64);
-   dest[0] = dc_dct_pred[cc];
-   if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) {
-      if (picture->picture_coding_type != D_TYPE)
-          get_mpeg1_intra_block(bs, quantizer_scale, dest);
-   } else if (picture->intra_vlc_format)
-      get_intra_block_B15(bs, quantizer_scale, dest);
-   else
-      get_intra_block_B14(bs, quantizer_scale, dest);
-
-   memcpy(bs->ycbcr_buffer, dest, sizeof(int16_t) * 64);
-
-   bs->num_ycbcr_blocks[cc]++;
-   bs->ycbcr_stream[cc]++;
-   bs->ycbcr_buffer += 64;
-}
-
-static INLINE void
-slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
-                    unsigned x, unsigned y,  enum pipe_mpeg12_dct_type coding, int quantizer_scale)
-{
-   short dest[64];
-
-   bs->ycbcr_stream[cc]->x = x;
-   bs->ycbcr_stream[cc]->y = y;
-   bs->ycbcr_stream[cc]->intra = 0;
-   bs->ycbcr_stream[cc]->coding = coding;
-   bs->ycbcr_stream[cc]->block_num = bs->block_num++;
-
-   memset(dest, 0, sizeof(int16_t) * 64);
-   if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1)
-      get_mpeg1_non_intra_block(bs, quantizer_scale, dest);
-   else
-      get_non_intra_block(bs, quantizer_scale, dest);
-
-   memcpy(bs->ycbcr_buffer, dest, sizeof(int16_t) * 64);
-
-   bs->num_ycbcr_blocks[cc]++;
-   bs->ycbcr_stream[cc]++;
-   bs->ycbcr_buffer += 64;
-}
-
-static INLINE void
-motion_mp1(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_x = (mv->top.x + (get_motion_delta(bs, f_code[0]) << f_code[1]));
-   motion_x = bound_motion_vector (motion_x, f_code[0] + f_code[1]);
-   mv->top.x = mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = (mv->top.y + (get_motion_delta(bs, f_code[0]) << f_code[1]));
-   motion_y = bound_motion_vector (motion_y, f_code[0] + f_code[1]);
-   mv->top.y = mv->bottom.y = motion_y;
-}
-
-static INLINE void
-motion_fr_frame(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector(motion_x, f_code[0]);
-   mv->top.x = mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
-   motion_y = bound_motion_vector(motion_y, f_code[1]);
-   mv->top.y = mv->bottom.y = motion_y;
-}
-
-static INLINE void
-motion_fr_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   vl_vlc_needbits(&bs->vlc);
-   mv->top.field_select = vl_vlc_ubits(&bs->vlc, 1) ?
-      PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
-   vl_vlc_dumpbits(&bs->vlc, 1);
-
-   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector (motion_x, f_code[0]);
-   mv->top.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]);
-   /* motion_y = bound_motion_vector (motion_y, f_code[1]); */
-   mv->top.y = motion_y << 1;
-
-   vl_vlc_needbits(&bs->vlc);
-   mv->bottom.field_select = vl_vlc_ubits(&bs->vlc, 1) ?
-      PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
-   vl_vlc_dumpbits(&bs->vlc, 1);
-
-   motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector (motion_x, f_code[0]);
-   mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = (mv->bottom.y >> 1) + get_motion_delta(bs, f_code[1]);
-   /* motion_y = bound_motion_vector (motion_y, f_code[1]); */
-   mv->bottom.y = motion_y << 1;
-}
-
-static INLINE void
-motion_fr_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   // TODO Implement dmv
-   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector(motion_x, f_code[0]);
-   mv->top.x = mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]);
-   /* motion_y = bound_motion_vector (motion_y, f_code[1]); */
-   mv->top.y = mv->bottom.y = motion_y << 1;
-}
-
-/* like motion_frame, but parsing without actual motion compensation */
-static INLINE void
-motion_fr_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
-{
-   int tmp;
-
-   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
-
-   vl_vlc_needbits(&bs->vlc);
-   tmp = (mv->top.x + get_motion_delta(bs, f_code[0]));
-   tmp = bound_motion_vector (tmp, f_code[0]);
-   mv->top.x = mv->bottom.x = tmp;
-
-   vl_vlc_needbits(&bs->vlc);
-   tmp = (mv->top.y + get_motion_delta(bs, f_code[1]));
-   tmp = bound_motion_vector (tmp, f_code[1]);
-   mv->top.y = mv->bottom.y = tmp;
-
-   vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */
-}
-
-static INLINE void
-motion_fi_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   // ref_field
-   //vl_vlc_ubits(&bs->vlc, 1);
-
-   // TODO field select may need to do something here for bob (weave ok)
-   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
-   vl_vlc_dumpbits(&bs->vlc, 1);
-
-   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector (motion_x, f_code[0]);
-   mv->top.x = mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
-   motion_y = bound_motion_vector (motion_y, f_code[1]);
-   mv->top.y = mv->bottom.y = motion_y;
-}
-
-static INLINE void
-motion_fi_16x8(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   vl_vlc_needbits(&bs->vlc);
-
-   // ref_field
-   //vl_vlc_ubits(&bs->vlc, 1);
-
-   // TODO field select may need to do something here bob  (weave ok)
-   mv->top.field_select = PIPE_VIDEO_FRAME;
-   vl_vlc_dumpbits(&bs->vlc, 1);
-
-   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector (motion_x, f_code[0]);
-   mv->top.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
-   motion_y = bound_motion_vector (motion_y, f_code[1]);
-   mv->top.y = motion_y;
-
-   vl_vlc_needbits(&bs->vlc);
-   // ref_field
-   //vl_vlc_ubits(&bs->vlc, 1);
-
-   // TODO field select may need to do something here for bob (weave ok)
-   mv->bottom.field_select = PIPE_VIDEO_FRAME;
-   vl_vlc_dumpbits(&bs->vlc, 1);
-
-   motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector (motion_x, f_code[0]);
-   mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = mv->bottom.y + get_motion_delta(bs, f_code[1]);
-   motion_y = bound_motion_vector (motion_y, f_code[1]);
-   mv->bottom.y = motion_y;
-}
-
-static INLINE void
-motion_fi_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
-{
-   int motion_x, motion_y;
-
-   // TODO field select may need to do something here for bob  (weave ok)
-   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
-   motion_x = bound_motion_vector (motion_x, f_code[0]);
-   mv->top.x = mv->bottom.x = motion_x;
-
-   vl_vlc_needbits(&bs->vlc);
-   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
-   motion_y = bound_motion_vector (motion_y, f_code[1]);
-   mv->top.y = mv->bottom.y = motion_y;
-}
-
-
-static INLINE void
-motion_fi_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct vl_motionvector *mv)
-{
-   int tmp;
-
-   vl_vlc_needbits(&bs->vlc);
-   vl_vlc_dumpbits(&bs->vlc, 1); /* remove field_select */
-
-   tmp = (mv->top.x + get_motion_delta(bs, f_code[0]));
-   tmp = bound_motion_vector(tmp, f_code[0]);
-   mv->top.x = mv->bottom.x = tmp;
-
-   vl_vlc_needbits(&bs->vlc);
-   tmp = (mv->top.y + get_motion_delta(bs, f_code[1]));
-   tmp = bound_motion_vector(tmp, f_code[1]);
-   mv->top.y = mv->bottom.y = tmp;
-
-   vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */
-}
-
-#define MOTION_CALL(routine, macroblock_modes)		\
-do {							\
-   if ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD)  \
-      routine(bs, picture->f_code[0], &mv_fwd);         \
-   if ((macroblock_modes) & MACROBLOCK_MOTION_BACKWARD)	\
-      routine(bs, picture->f_code[1], &mv_bwd);         \
-} while (0)
-
-static INLINE void
-store_motionvectors(struct vl_mpg12_bs *bs, unsigned *mv_pos,
-                    struct vl_motionvector *mv_fwd,
-                    struct vl_motionvector *mv_bwd)
-{
-   bs->mv_stream[0][*mv_pos].top = mv_fwd->top;
-   bs->mv_stream[0][*mv_pos].bottom =
-      mv_fwd->top.field_select == PIPE_VIDEO_FRAME ?
-      mv_fwd->top : mv_fwd->bottom;
-
-   bs->mv_stream[1][*mv_pos].top = mv_bwd->top;
-   bs->mv_stream[1][*mv_pos].bottom =
-      mv_bwd->top.field_select == PIPE_VIDEO_FRAME ?
-      mv_bwd->top : mv_bwd->bottom;
-
-   (*mv_pos)++;
-}
-
-static INLINE bool
-slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-           int *quantizer_scale, unsigned *x, unsigned *y, unsigned *mv_pos)
-{
-   const MBAtab * mba;
-
-   vl_vlc_need32bits(&bs->vlc);
-   while(bs->vlc.buf < 0x101 || bs->vlc.buf > 0x1AF) {
-      if(!vl_vlc_getbyte(&bs->vlc))
-         return false;
-   }
-   *y = (bs->vlc.buf & 0xFF) - 1;
-   vl_vlc_restart(&bs->vlc);
-
-   *quantizer_scale = get_quantizer_scale(bs, picture);
-
-   /* ignore intra_slice and all the extra data */
-   while (bs->vlc.buf & 0x80000000) {
-      vl_vlc_dumpbits(&bs->vlc, 9);
-      vl_vlc_needbits(&bs->vlc);
-   }
-
-   /* decode initial macroblock address increment */
-   *x = 0;
-   while (1) {
-      if (bs->vlc.buf >= 0x08000000) {
-          mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 6) - 2);
-          break;
-      } else if (bs->vlc.buf >= 0x01800000) {
-          mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 12) - 24);
-          break;
-      } else switch (vl_vlc_ubits(&bs->vlc, 12)) {
-      case 8:		/* macroblock_escape */
-          *x += 33;
-          vl_vlc_dumpbits(&bs->vlc, 11);
-          vl_vlc_needbits(&bs->vlc);
-          continue;
-      case 15:	/* macroblock_stuffing (MPEG1 only) */
-          bs->vlc.buf &= 0xfffff;
-          vl_vlc_dumpbits(&bs->vlc, 11);
-          vl_vlc_needbits(&bs->vlc);
-          continue;
-      default:	/* error */
-          return false;
-      }
-   }
-   vl_vlc_dumpbits(&bs->vlc, mba->len + 1);
-   *x += mba->mba;
-
-   while (*x >= bs->width) {
-      *x -= bs->width;
-      (*y)++;
-   }
-   if (*y > bs->height)
-      return false;
-
-   *mv_pos = *x + *y * bs->width;
-
-   return true;
-}
-
-static INLINE bool
-decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
-{
-   enum vl_field_select default_field_select;
-   struct vl_motionvector mv_fwd, mv_bwd;
-   enum pipe_mpeg12_dct_type dct_type;
-
-   /* predictor for DC coefficients in intra blocks */
-   int dc_dct_pred[3] = { 0, 0, 0 };
-   int quantizer_scale;
-
-   unsigned x, y, mv_pos;
-
-   switch(picture->picture_structure) {
-   case TOP_FIELD:
-      default_field_select = PIPE_VIDEO_TOP_FIELD;
-      break;
-
-   case BOTTOM_FIELD:
-      default_field_select = PIPE_VIDEO_BOTTOM_FIELD;
-      break;
-
-   default:
-      default_field_select = PIPE_VIDEO_FRAME;
-      break;
-   }
-
-   if (!slice_init(bs, picture, &quantizer_scale, &x, &y, &mv_pos))
-      return false;
-
-   mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
-   mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select;
-
-   mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
-   mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select;
-
-   while (1) {
-      int macroblock_modes;
-      int mba_inc;
-      const MBAtab * mba;
-
-      vl_vlc_needbits(&bs->vlc);
-
-      macroblock_modes = get_macroblock_modes(bs, picture);
-      dct_type = get_dct_type(bs, picture, macroblock_modes);
-
-      switch(macroblock_modes & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD)) {
-      case (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD):
-         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF;
-         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF;
+         coeff.length += 1;
+         coeff.run = 1;
          break;
 
       default:
-         mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select;
-         mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select;
-
-         /* fall through */
-      case MACROBLOCK_MOTION_FORWARD:
-         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
-         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
-         break;
-
-      case MACROBLOCK_MOTION_BACKWARD:
-         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
-         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+         coeff.length += 1;
+         coeff.run += 1;
          break;
       }
 
-      /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
-      if (macroblock_modes & MACROBLOCK_QUANT)
-         quantizer_scale = get_quantizer_scale(bs, picture);
+      for(i=0; i<(1 << (17 - coeff.length)); ++i)
+         dst[src->bitcode << 1 | i] = coeff;
 
-      if (macroblock_modes & MACROBLOCK_INTRA) {
-
-         if (picture->concealment_motion_vectors) {
-            if (picture->picture_structure == FRAME_PICTURE)
-               motion_fr_conceal(bs, picture->f_code[0], &mv_fwd);
-            else
-               motion_fi_conceal(bs, picture->f_code[0], &mv_fwd);
-
-         } else {
-            mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
-            mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
-         }
-         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
-         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
-
-         // unravaled loop of 6 block(i) calls in macroblock()
-         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
-
-         if (picture->picture_coding_type == D_TYPE) {
-            vl_vlc_needbits(&bs->vlc);
-            vl_vlc_dumpbits(&bs->vlc, 1);
-         }
-
-      } else {
-         if (picture->picture_structure == FRAME_PICTURE)
-            switch (macroblock_modes & MOTION_TYPE_MASK) {
-            case MC_FRAME:
-               if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) {
-                  MOTION_CALL(motion_mp1, macroblock_modes);
-               } else {
-                  MOTION_CALL(motion_fr_frame, macroblock_modes);
-               }
-               break;
-
-            case MC_FIELD:
-               MOTION_CALL (motion_fr_field, macroblock_modes);
-               break;
-
-            case MC_DMV:
-               MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD);
-               break;
-
-            case 0:
-               /* non-intra mb without forward mv in a P picture */
-               mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
-               mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
-               break;
-            }
-         else
-            switch (macroblock_modes & MOTION_TYPE_MASK) {
-            case MC_FIELD:
-               MOTION_CALL (motion_fi_field, macroblock_modes);
-               break;
-
-            case MC_16X8:
-               MOTION_CALL (motion_fi_16x8, macroblock_modes);
-               break;
-
-            case MC_DMV:
-               MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD);
-               break;
-
-            case 0:
-               /* non-intra mb without forward mv in a P picture */
-               mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
-               mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
-               break;
-            }
-
-         if (macroblock_modes & MACROBLOCK_PATTERN) {
-            int coded_block_pattern = get_coded_block_pattern(bs);
-
-            // TODO  optimize not fully used for idct accel only mc.
-            if (coded_block_pattern & 0x20)
-               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0  luma 0
-            if (coded_block_pattern & 0x10)
-               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1
-            if (coded_block_pattern & 0x08)
-               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2
-            if (coded_block_pattern & 0x04)
-               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3
-            if (coded_block_pattern & 0x2)
-               slice_non_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma
-            if (coded_block_pattern & 0x1)
-               slice_non_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma
-         }
-
-         dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
-      }
-
-      store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd);
-      if (++x >= bs->width) {
-         ++y;
-         if (y >= bs->height)
-            return false;
-         x -= bs->width;
-      }
-
-      vl_vlc_needbits(&bs->vlc);
-      mba_inc = 0;
-      while (1) {
-         if (bs->vlc.buf >= 0x10000000) {
-            mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 5) - 2);
-            break;
-         } else if (bs->vlc.buf >= 0x03000000) {
-            mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 11) - 24);
-            break;
-         } else switch (vl_vlc_ubits(&bs->vlc, 11)) {
-         case 8:		/* macroblock_escape */
-            mba_inc += 33;
-            /* pass through */
-         case 15:	/* macroblock_stuffing (MPEG1 only) */
-            vl_vlc_dumpbits(&bs->vlc, 11);
-            vl_vlc_needbits(&bs->vlc);
-            continue;
-         default:	/* end of slice, or error */
-            return true;
-         }
-      }
-      vl_vlc_dumpbits(&bs->vlc, mba->len);
-      mba_inc += mba->mba;
-      if (mba_inc) {
-         //TODO  conversion to signed format signed format
-         dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
-
-         mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select;
-         mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select;
-
-         if (picture->picture_coding_type == P_TYPE) {
-            mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
-            mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
-         }
-
-         x += mba_inc;
-         do {
-            store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd);
-         } while (--mba_inc);
-      }
-      while (x >= bs->width) {
-         ++y;
-         if (y >= bs->height)
-            return false;
-         x -= bs->width;
+      if (has_sign) {
+	 coeff.level = -coeff.level;
+         for(; i<(1 << (18 - coeff.length)); ++i)
+            dst[src->bitcode << 1 | i] = coeff;
       }
    }
 }
 
-void
-vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height)
+static INLINE void
+init_tables()
 {
+   vl_vlc_init_table(tbl_B1, Elements(tbl_B1), macroblock_address_increment, Elements(macroblock_address_increment));
+   vl_vlc_init_table(tbl_B2, Elements(tbl_B2), macroblock_type_i, Elements(macroblock_type_i));
+   vl_vlc_init_table(tbl_B3, Elements(tbl_B3), macroblock_type_p, Elements(macroblock_type_p));
+   vl_vlc_init_table(tbl_B4, Elements(tbl_B4), macroblock_type_b, Elements(macroblock_type_b));
+   vl_vlc_init_table(tbl_B9, Elements(tbl_B9), coded_block_pattern, Elements(coded_block_pattern));
+   vl_vlc_init_table(tbl_B10, Elements(tbl_B10), motion_code, Elements(motion_code));
+   vl_vlc_init_table(tbl_B11, Elements(tbl_B11), dmvector, Elements(dmvector));
+   vl_vlc_init_table(tbl_B12, Elements(tbl_B12), dct_dc_size_luminance, Elements(dct_dc_size_luminance));
+   vl_vlc_init_table(tbl_B13, Elements(tbl_B13), dct_dc_size_chrominance, Elements(dct_dc_size_chrominance));
+   init_dct_coeff_table(tbl_B14_DC, dct_coeff_tbl_zero, Elements(dct_coeff_tbl_zero), true);
+   init_dct_coeff_table(tbl_B14_AC, dct_coeff_tbl_zero, Elements(dct_coeff_tbl_zero), false);
+   init_dct_coeff_table(tbl_B15, dct_coeff_tbl_one, Elements(dct_coeff_tbl_one), false);
+}
+
+static INLINE int
+DIV2DOWN(int todiv)
+{
+   return (todiv&~1)/2;
+}
+
+static INLINE int
+DIV2UP(int todiv)
+{
+   return (todiv+1)/2;
+}
+
+static INLINE void
+motion_vector(struct vl_mpg12_bs *bs, int r, int s, int dmv, short delta[2], short dmvector[2])
+{
+   int t;
+   for (t = 0; t < 2; ++t) {
+      int motion_code;
+      int r_size = bs->desc.f_code[s][t];
+
+      vl_vlc_fillbits(&bs->vlc);
+      motion_code = vl_vlc_get_vlclbf(&bs->vlc, tbl_B10, 11);
+
+      assert(r_size >= 0);
+      if (r_size && motion_code) {
+         int residual = vl_vlc_get_uimsbf(&bs->vlc, r_size) + 1;
+         delta[t] = ((abs(motion_code) - 1) << r_size) + residual;
+         if (motion_code < 0)
+            delta[t] = -delta[t];
+      } else
+         delta[t] = motion_code;
+      if (dmv)
+         dmvector[t] = vl_vlc_get_vlclbf(&bs->vlc, tbl_B11, 2);
+   }
+}
+
+static INLINE int
+wrap(short f, int shift)
+{
+   if (f < (-16 << shift))
+      return f + (32 << shift);
+   else if (f >= 16 << shift)
+      return f - (32 << shift);
+   else
+      return f;
+}
+
+static INLINE void
+motion_vector_frame(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb)
+{
+   int dmv = mb->macroblock_modes.bits.frame_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME;
+   short dmvector[2], delta[2];
+
+   if (mb->macroblock_modes.bits.frame_motion_type == PIPE_MPEG12_MO_TYPE_FIELD) {
+      mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << s;
+      motion_vector(bs, 0, s, dmv, delta, dmvector);
+      mb->PMV[0][s][0] = wrap(mb->PMV[0][s][0] + delta[0], bs->desc.f_code[s][0]);
+      mb->PMV[0][s][1] = wrap(DIV2DOWN(mb->PMV[0][s][1]) + delta[1], bs->desc.f_code[s][1]) * 2;
+
+      mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << (s + 2);
+      motion_vector(bs, 1, s, dmv, delta, dmvector);
+      mb->PMV[1][s][0] = wrap(mb->PMV[1][s][0] + delta[0], bs->desc.f_code[s][0]);
+      mb->PMV[1][s][1] = wrap(DIV2DOWN(mb->PMV[1][s][1]) + delta[1], bs->desc.f_code[s][1]) * 2;
+
+   } else {
+      motion_vector(bs, 0, s, dmv, delta, dmvector);
+      mb->PMV[0][s][0] = wrap(mb->PMV[0][s][0] + delta[0], bs->desc.f_code[s][0]);
+      mb->PMV[0][s][1] = wrap(mb->PMV[0][s][1] + delta[1], bs->desc.f_code[s][1]);
+   }
+}
+
+static INLINE void
+motion_vector_field(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb)
+{
+   int dmv = mb->macroblock_modes.bits.field_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME;
+   short dmvector[2], delta[2];
+
+   if (mb->macroblock_modes.bits.field_motion_type == PIPE_MPEG12_MO_TYPE_16x8) {
+      mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << s;
+      motion_vector(bs, 0, s, dmv, delta, dmvector);
+
+      mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << (s + 2);
+      motion_vector(bs, 1, s, dmv, delta, dmvector);
+   } else {
+      if (!dmv)
+         mb->motion_vertical_field_select |= vl_vlc_get_uimsbf(&bs->vlc, 1) << s;
+      motion_vector(bs, 0, s, dmv, delta, dmvector);
+   }
+}
+
+static INLINE void
+reset_predictor(struct vl_mpg12_bs *bs) {
+   bs->pred_dc[0] = bs->pred_dc[1] = bs->pred_dc[2] = 0;
+}
+
+static INLINE void
+decode_dct(struct vl_mpg12_bs *bs, struct pipe_mpeg12_macroblock *mb, int scale)
+{
+   static const unsigned blk2cc[] = { 0, 0, 0, 0, 1, 2 };
+   static const struct vl_vlc_entry *blk2dcsize[] = {
+      tbl_B12, tbl_B12, tbl_B12, tbl_B12, tbl_B13, tbl_B13
+   };
+
+   bool intra = mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA;
+   const struct dct_coeff *table = intra ? bs->intra_dct_tbl : tbl_B14_AC;
+   const struct dct_coeff *entry;
+   int i, cbp, blk = 0;
+   short *dst = mb->blocks;
+
+   vl_vlc_fillbits(&bs->vlc);
+   mb->coded_block_pattern = cbp = intra ? 0x3F : vl_vlc_get_vlclbf(&bs->vlc, tbl_B9, 9);
+
+   goto entry;
+
+   while(1) {
+      vl_vlc_eatbits(&bs->vlc, entry->length);
+      if (entry->run == dct_End_of_Block) {
+
+         dst += 64;
+         cbp <<= 1;
+         cbp &= 0x3F;
+         blk++;
+
+entry:
+         if (!cbp)
+            break;
+
+         while(!(cbp & 0x20)) {
+            cbp <<= 1;
+            blk++;
+         }
+
+         vl_vlc_fillbits(&bs->vlc);
+
+         if (intra) {
+            unsigned cc = blk2cc[blk];
+            unsigned size = vl_vlc_get_vlclbf(&bs->vlc, blk2dcsize[blk], 10);
+
+            if (size) {
+               int dct_diff = vl_vlc_get_uimsbf(&bs->vlc, size);
+               int half_range = 1 << (size - 1);
+               if (dct_diff < half_range)
+                  dct_diff = (dct_diff + 1) - (2 * half_range);
+               bs->pred_dc[cc] += dct_diff;
+            }
+
+            dst[0] = bs->pred_dc[cc];
+            i = 0;
+
+         } else {
+            entry = tbl_B14_DC + vl_vlc_peekbits(&bs->vlc, 17);
+            i = -1;
+            continue;
+         }
+
+      } else if (entry->run == dct_Escape) {
+         i += vl_vlc_get_uimsbf(&bs->vlc, 6) + 1;
+         if (i > 64)
+            break;
+
+         dst[i] = vl_vlc_get_simsbf(&bs->vlc, 12) * scale;
+
+      } else {
+         i += entry->run;
+         if (i > 64)
+            break;
+
+         dst[i] = entry->level * scale;
+      }
+
+      vl_vlc_fillbits(&bs->vlc);
+      entry = table + vl_vlc_peekbits(&bs->vlc, 17);
+   }
+}
+
+static INLINE bool
+decode_slice(struct vl_mpg12_bs *bs)
+{
+   struct pipe_mpeg12_macroblock mb = {};
+   short dct_blocks[64*6];
+   unsigned dct_scale;
+   signed x = -1;
+
+   mb.base.codec = PIPE_VIDEO_CODEC_MPEG12;
+   mb.y = vl_vlc_get_uimsbf(&bs->vlc, 8) - 1;
+   mb.blocks = dct_blocks;
+
+   reset_predictor(bs);
+   dct_scale = quant_scale[bs->desc.q_scale_type][vl_vlc_get_uimsbf(&bs->vlc, 5)];
+
+   if (vl_vlc_get_uimsbf(&bs->vlc, 1))
+      while (vl_vlc_get_uimsbf(&bs->vlc, 9) & 1)
+         vl_vlc_fillbits(&bs->vlc);
+
+   do {
+      int inc = 0;
+
+      vl_vlc_fillbits(&bs->vlc);
+
+      while (vl_vlc_peekbits(&bs->vlc, 11) == 15) {
+         vl_vlc_eatbits(&bs->vlc, 11);
+         vl_vlc_fillbits(&bs->vlc);
+      }
+
+      while (vl_vlc_peekbits(&bs->vlc, 11) == 8) {
+         vl_vlc_eatbits(&bs->vlc, 11);
+         vl_vlc_fillbits(&bs->vlc);
+         inc += 33;
+      }
+      inc += vl_vlc_get_vlclbf(&bs->vlc, tbl_B1, 11);
+      if (x != -1) {
+         mb.num_skipped_macroblocks = inc - 1;
+         bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1);
+      }
+      mb.x = x += inc;
+
+      switch (bs->desc.picture_coding_type) {
+      case PIPE_MPEG12_PICTURE_CODING_TYPE_I:
+         mb.macroblock_type = vl_vlc_get_vlclbf(&bs->vlc, tbl_B2, 2);
+         break;
+
+      case PIPE_MPEG12_PICTURE_CODING_TYPE_P:
+         mb.macroblock_type = vl_vlc_get_vlclbf(&bs->vlc, tbl_B3, 6);
+         break;
+
+      case PIPE_MPEG12_PICTURE_CODING_TYPE_B:
+         mb.macroblock_type = vl_vlc_get_vlclbf(&bs->vlc, tbl_B4, 6);
+         break;
+
+      default:
+         mb.macroblock_type = 0;
+         /* dumb gcc */
+         assert(0);
+      }
+
+      mb.macroblock_modes.value = 0;
+      if (mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD | PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD)) {
+         if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME) {
+            if (bs->desc.frame_pred_frame_dct == 0)
+               mb.macroblock_modes.bits.frame_motion_type = vl_vlc_get_uimsbf(&bs->vlc, 2);
+            else
+               mb.macroblock_modes.bits.frame_motion_type = 2;
+         } else
+            mb.macroblock_modes.bits.field_motion_type = vl_vlc_get_uimsbf(&bs->vlc, 2);
+
+      } else if ((mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA) && bs->desc.concealment_motion_vectors) {
+         if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME)
+            mb.macroblock_modes.bits.frame_motion_type = 2;
+         else
+            mb.macroblock_modes.bits.field_motion_type = 1;
+      }
+
+      if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME &&
+          bs->desc.frame_pred_frame_dct == 0 &&
+          mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_INTRA | PIPE_MPEG12_MB_TYPE_PATTERN))
+         mb.macroblock_modes.bits.dct_type = vl_vlc_get_uimsbf(&bs->vlc, 1);
+
+      if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_QUANT)
+         dct_scale = quant_scale[bs->desc.q_scale_type][vl_vlc_get_uimsbf(&bs->vlc, 5)];
+
+      if (inc > 1 && bs->desc.picture_coding_type == PIPE_MPEG12_PICTURE_CODING_TYPE_P)
+         memset(mb.PMV, 0, sizeof(mb.PMV));
+
+      mb.motion_vertical_field_select = 0;
+      if ((mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_FORWARD) ||
+          (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA && bs->desc.concealment_motion_vectors)) {
+         if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME)
+            motion_vector_frame(bs, 0, &mb);
+         else
+            motion_vector_field(bs, 0, &mb);
+      }
+
+      if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD) {
+         if (bs->desc.picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FRAME)
+            motion_vector_frame(bs, 1, &mb);
+         else
+            motion_vector_field(bs, 1, &mb);
+      }
+
+      if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA && bs->desc.concealment_motion_vectors) {
+         unsigned extra = vl_vlc_get_uimsbf(&bs->vlc, 1);
+         mb.PMV[1][0][0] = mb.PMV[0][0][0];
+         mb.PMV[1][0][1] = mb.PMV[0][0][1];
+         assert(extra);
+      } else if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA ||
+                !(mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD |
+                                        PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD))) {
+         memset(mb.PMV, 0, sizeof(mb.PMV));
+      }
+
+      if ((mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_FORWARD &&
+           mb.macroblock_modes.bits.frame_motion_type == 2) ||
+          (mb.macroblock_modes.bits.frame_motion_type == 3)) {
+            mb.PMV[1][0][0] = mb.PMV[0][0][0];
+            mb.PMV[1][0][1] = mb.PMV[0][0][1];
+      }
+
+      if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD &&
+          mb.macroblock_modes.bits.frame_motion_type == 2) {
+            mb.PMV[1][1][0] = mb.PMV[0][1][0];
+            mb.PMV[1][1][1] = mb.PMV[0][1][1];
+      }
+
+      if (inc > 1 || !(mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA))
+         reset_predictor(bs);
+
+      if (mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_INTRA | PIPE_MPEG12_MB_TYPE_PATTERN)) {
+         memset(dct_blocks, 0, sizeof(dct_blocks));
+         decode_dct(bs, &mb, dct_scale);
+      } else
+         mb.coded_block_pattern = 0;
+
+   } while (vl_vlc_bytes_left(&bs->vlc) && vl_vlc_peekbits(&bs->vlc, 23));
+
+   mb.num_skipped_macroblocks = 0;
+   bs->decoder->decode_macroblock(bs->decoder, &mb.base, 1);
+   return true;
+}
+
+void
+vl_mpg12_bs_init(struct vl_mpg12_bs *bs, struct pipe_video_decoder *decoder)
+{
+   static bool tables_initialized = false;
+
    assert(bs);
 
    memset(bs, 0, sizeof(struct vl_mpg12_bs));
 
-   bs->width = width;
-   bs->height = height;
-}
+   bs->decoder = decoder;
 
-void
-vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
-                        short *ycbcr_buffer, struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES])
-{
-   unsigned i;
-
-   assert(bs);
-   assert(ycbcr_stream && ycbcr_buffer);
-   assert(mv_stream);
-
-   bs->block_num = 0;
-
-   for (i = 0; i < VL_MAX_PLANES; ++i)
-      bs->ycbcr_stream[i] = ycbcr_stream[i];
-   bs->ycbcr_buffer = ycbcr_buffer;
-
-   for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
-      bs->mv_stream[i] = mv_stream[i];
-
-   // TODO
-   for (i = 0; i < bs->width*bs->height; ++i) {
-      bs->mv_stream[0][i].top.x = bs->mv_stream[0][i].top.y = 0;
-      bs->mv_stream[0][i].top.field_select = PIPE_VIDEO_FRAME;
-      bs->mv_stream[0][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
-      bs->mv_stream[0][i].bottom.x = bs->mv_stream[0][i].bottom.y = 0;
-      bs->mv_stream[0][i].bottom.field_select = PIPE_VIDEO_FRAME;
-      bs->mv_stream[0][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
-
-      bs->mv_stream[1][i].top.x = bs->mv_stream[1][i].top.y = 0;
-      bs->mv_stream[1][i].top.field_select = PIPE_VIDEO_FRAME;
-      bs->mv_stream[1][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
-      bs->mv_stream[1][i].bottom.x = bs->mv_stream[1][i].bottom.y = 0;
-      bs->mv_stream[1][i].bottom.field_select = PIPE_VIDEO_FRAME;
-      bs->mv_stream[1][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+   if (!tables_initialized) {
+      init_tables();
+      tables_initialized = true;
    }
 }
 
 void
-vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
-                   struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3])
+vl_mpg12_bs_set_picture_desc(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
+{
+   bs->desc = *picture;
+   bs->intra_dct_tbl = picture->intra_vlc_format ? tbl_B15 : tbl_B14_AC;
+}
+
+void
+vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const uint8_t *buffer)
 {
    assert(bs);
-   assert(num_ycbcr_blocks);
    assert(buffer && num_bytes);
 
-   bs->num_ycbcr_blocks = num_ycbcr_blocks;
+   while(num_bytes > 2) {
+      if (buffer[0] == 0x00 && buffer[1] == 0x00 && buffer[2] == 0x01 &&
+	buffer[3] >= 0x01 && buffer[3] < 0xAF) {
+         unsigned consumed;
 
-   vl_vlc_init(&bs->vlc, buffer, num_bytes);
+         buffer += 3;
+         num_bytes -= 3;
 
-   while(decode_slice(bs, picture));
+         vl_vlc_init(&bs->vlc, buffer, num_bytes);
+
+         if (!decode_slice(bs))
+            return;
+
+         /* it's possible for the vlc to consume up to eight extra bytes */
+         consumed = num_bytes - vl_vlc_bytes_left(&bs->vlc);
+         consumed = consumed > 8 ? consumed - 8 : 0;
+
+         /* crap, this is a bug we have consumed more bytes than left in the buffer */
+         assert(consumed <= num_bytes);
+
+         num_bytes -= consumed;
+         buffer += consumed;
+
+      } else {
+         ++buffer;
+         --num_bytes;
+      }
+   }
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
index 797a7e792a8..c3f14a17932 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
@@ -30,32 +30,25 @@
 
 #include "vl_defines.h"
 #include "vl_vlc.h"
-#include "vl_vertex_buffers.h"
 
 struct vl_mpg12_bs
 {
-   unsigned width, height;
+   struct pipe_video_decoder *decoder;
+
+   struct pipe_mpeg12_picture_desc desc;
+   struct dct_coeff *intra_dct_tbl;
 
    struct vl_vlc vlc;
-
-   unsigned block_num;
-   unsigned *num_ycbcr_blocks;
-
-   struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
-   short *ycbcr_buffer;
-
-   struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES];
+   short pred_dc[3];
 };
 
 void
-vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height);
+vl_mpg12_bs_init(struct vl_mpg12_bs *bs, struct pipe_video_decoder *decoder);
 
 void
-vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
-                        short *ycbcr_buffer, struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES]);
+vl_mpg12_bs_set_picture_desc(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture);
 
 void
-vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
-                   struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3]);
+vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const uint8_t *buffer);
 
 #endif /* vl_mpeg12_bitstream_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index a27066765eb..e0b477d315a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -450,9 +450,7 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
       goto error_zscan;
 
    if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
-      vl_mpg12_bs_init(&buffer->bs,
-                       dec->base.width / MACROBLOCK_WIDTH,
-                       dec->base.height / MACROBLOCK_HEIGHT);
+      vl_mpg12_bs_init(&buffer->bs, decoder);
 
    return buffer;
 
@@ -614,7 +612,7 @@ vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder)
       buf->mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
 
    if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
-      vl_mpg12_bs_set_buffers(&buf->bs, buf->ycbcr_stream, buf->texels, buf->mv_stream);
+      vl_mpg12_bs_set_picture_desc(&buf->bs, &dec->picture_desc);
 
    } else {
 
@@ -708,7 +706,7 @@ vl_mpeg12_decode_bitstream(struct pipe_video_decoder *decoder,
       vl_zscan_set_layout(&buf->zscan[i], dec->picture_desc.alternate_scan ?
                           dec->zscan_alternate : dec->zscan_normal);
 
-   vl_mpg12_bs_decode(&buf->bs, num_bytes, data, &dec->picture_desc, buf->num_ycbcr_blocks);
+   vl_mpg12_bs_decode(&buf->bs, num_bytes, data);
 }
 
 static void
diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h
index e81b1e9afd2..17a7b650c09 100644
--- a/src/gallium/auxiliary/vl/vl_vlc.h
+++ b/src/gallium/auxiliary/vl/vl_vlc.h
@@ -25,116 +25,147 @@
  *
  **************************************************************************/
 
-/**
- * This file is based uppon slice_xvmc.c and vlc.h from the xine project,
- * which in turn is based on mpeg2dec. The following is the original copyright:
- *
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
- * See http://libmpeg2.sourceforge.net/ for updates.
- *
- * mpeg2dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * mpeg2dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
 #ifndef vl_vlc_h
 #define vl_vlc_h
 
-#include "pipe/p_compiler.h"
+#include <assert.h>
+
+#include <pipe/p_compiler.h>
+
+#include <util/u_math.h>
 
 struct vl_vlc
 {
-   uint32_t buf; /* current 32 bit working set of buffer */
-   int bits;     /* used bits in working set */
-   const uint8_t *ptr; /* buffer with stream data */
-   const uint8_t *max; /* ptr+len of buffer */
+   uint64_t buffer;
+   unsigned valid_bits;
+   uint32_t *data;
+   uint32_t *end;
+};
+
+struct vl_vlc_entry
+{
+   int8_t length;
+   int8_t value;
+};
+
+struct vl_vlc_compressed
+{
+   uint16_t bitcode;
+   struct vl_vlc_entry entry;
 };
 
 static INLINE void
-vl_vlc_restart(struct vl_vlc *vlc)
+vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_vlc_compressed *src, unsigned src_size)
 {
-   vlc->buf = (vlc->ptr[0] << 24) | (vlc->ptr[1] << 16) | (vlc->ptr[2] << 8) | vlc->ptr[3];
-   vlc->bits = -16;
-   vlc->ptr += 4;
+   unsigned i, bits = util_logbase2(dst_size);
+
+   for (i=0;i<dst_size;++i) {
+      dst[i].length = 0;
+      dst[i].value = 0;
+   }
+
+   for(; src_size > 0; --src_size, ++src) {
+      for(i=0; i<(1 << (bits - src->entry.length)); ++i)
+         dst[src->bitcode >> (16 - bits) | i] = src->entry;
+   }
+}
+
+static INLINE void
+vl_vlc_fillbits(struct vl_vlc *vlc)
+{
+   if (vlc->valid_bits < 32) {
+      uint32_t value = *vlc->data;
+
+      //assert(vlc->data <= vlc->end);
+
+#ifndef PIPE_ARCH_BIG_ENDIAN
+      value = util_bswap32(value);
+#endif
+
+      vlc->buffer |= (uint64_t)value << (32 - vlc->valid_bits);
+      ++vlc->data;
+      vlc->valid_bits += 32;
+   }
 }
 
 static INLINE void
 vl_vlc_init(struct vl_vlc *vlc, const uint8_t *data, unsigned len)
 {
-   vlc->ptr = data;
-   vlc->max = data + len;
-   vl_vlc_restart(vlc);
+   assert(vlc);
+   assert(data && len);
+
+   vlc->buffer = 0;
+   vlc->valid_bits = 0;
+
+   /* align the data pointer */
+   while((uint64_t)data & 3) {
+      vlc->buffer |= (uint64_t)*data << (56 - vlc->valid_bits);
+      ++data;
+      --len;
+      vlc->valid_bits += 8;
+   }
+   vlc->data = (uint32_t*)data;
+   vlc->end = (uint32_t*)(data + len);
+
+   vl_vlc_fillbits(vlc);
+   vl_vlc_fillbits(vlc);
 }
 
-static INLINE bool
-vl_vlc_getbyte(struct vl_vlc *vlc)
+static INLINE unsigned
+vl_vlc_bytes_left(struct vl_vlc *vlc)
 {
-   vlc->buf <<= 8;
-   vlc->buf |= vlc->ptr[0];
-   vlc->ptr++;
-   return vlc->ptr < vlc->max;
+   return ((uint8_t*)vlc->end)-((uint8_t*)vlc->data);
 }
 
-#define vl_vlc_getword(vlc, shift)                                      \
-do {                                                                    \
-   (vlc)->buf |= (((vlc)->ptr[0] << 8) | (vlc)->ptr[1]) << (shift);     \
-   (vlc)->ptr += 2;                                                     \
-} while (0)
+static INLINE unsigned
+vl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits)
+{
+   //assert(vlc->valid_bits >= num_bits);
 
-/* make sure that there are at least 16 valid bits in bit_buf */
-#define vl_vlc_needbits(vlc)                    \
-do {                                            \
-    if ((vlc)->bits >= 0) {                      \
-	vl_vlc_getword(vlc, (vlc)->bits);       \
-	(vlc)->bits -= 16;                      \
-    }                                           \
-} while (0)
+   return vlc->buffer >> (64 - num_bits);
+}
 
-/* make sure that the full 32 bit of the buffer are valid */
 static INLINE void
-vl_vlc_need32bits(struct vl_vlc *vlc)
+vl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits)
 {
-   vl_vlc_needbits(vlc);
-   if (vlc->bits > -8) {
-      unsigned n = -vlc->bits;
-      vlc->buf <<= n;
-      vlc->buf |= *vlc->ptr << 8;
-      vlc->bits = -8;
-      vlc->ptr++;
-   }
-   if (vlc->bits > -16) {
-      unsigned n = -vlc->bits - 8;
-      vlc->buf <<= n;
-      vlc->buf |= *vlc->ptr;
-      vlc->bits = -16;
-      vlc->ptr++;
-   }
+   //assert(vlc->valid_bits > num_bits);
+
+   vlc->buffer <<= num_bits;
+   vlc->valid_bits -= num_bits;
 }
 
-/* remove num valid bits from bit_buf */
-#define vl_vlc_dumpbits(vlc, num)       \
-do {					\
-    (vlc)->buf <<= (num);		\
-    (vlc)->bits += (num);		\
-} while (0)
+static INLINE unsigned
+vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits)
+{
+   unsigned value;
 
-/* take num bits from the high part of bit_buf and zero extend them */
-#define vl_vlc_ubits(vlc, num) (((uint32_t)((vlc)->buf)) >> (32 - (num)))
+   //assert(vlc->valid_bits >= num_bits);
 
-/* take num bits from the high part of bit_buf and sign extend them */
-#define vl_vlc_sbits(vlc, num) (((int32_t)((vlc)->buf)) >> (32 - (num)))
+   value = vlc->buffer >> (64 - num_bits);
+   vl_vlc_eatbits(vlc, num_bits);
+
+   return value;
+}
+
+static INLINE signed
+vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits)
+{
+   signed value;
+
+   //assert(vlc->valid_bits >= num_bits);
+
+   value = ((int64_t)vlc->buffer) >> (64 - num_bits);
+   vl_vlc_eatbits(vlc, num_bits);
+
+   return value;
+}
+
+static INLINE int8_t
+vl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned num_bits)
+{
+   tbl += vl_vlc_peekbits(vlc, num_bits);
+   vl_vlc_eatbits(vlc, tbl->length);
+   return tbl->value;
+}
 
 #endif /* vl_vlc_h */
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 8166ac76b63..f655ed411f4 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -43,6 +43,17 @@ struct pipe_video_rect
    unsigned x, y, w, h;
 };
 
+/*
+ * see table 6-12 in the spec
+ */
+enum pipe_mpeg12_picture_coding_type
+{
+   PIPE_MPEG12_PICTURE_CODING_TYPE_I = 0x01,
+   PIPE_MPEG12_PICTURE_CODING_TYPE_P = 0x02,
+   PIPE_MPEG12_PICTURE_CODING_TYPE_B = 0x03,
+   PIPE_MPEG12_PICTURE_CODING_TYPE_D = 0x04
+};
+
 /*
  * see table 6-14 in the spec
  */

From 3d1af78fdc4319b04f9cf324106f127b498b962d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 25 Aug 2011 15:14:37 -0600
Subject: [PATCH 588/600] scons: add more LIBS for compiling with LLVM 2.9 on
 Windows

These extra libs shouldn't hurt with LLVM 2.8 or older.
---
 scons/llvm.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scons/llvm.py b/scons/llvm.py
index c8d2d372e4a..57fe922d0ae 100644
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -135,6 +135,8 @@ def generate(env):
         env.Append(LIBS = [
             'imagehlp',
             'psapi',
+            'shell32',
+            'advapi32'
         ])
         if env['msvc']:
             # Some of the LLVM C headers use the inline keyword without

From cdb7396390b0afffc0d33c9aba2898012802e6d0 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 25 Aug 2011 16:50:56 -0600
Subject: [PATCH 589/600] scons: don't compile some files with -gstabs if using
 mingw32

Compiling some (large) files with i686-pc-mingw32-gcc 4.2.2 (at least)
and the -gstabs option triggers a compiler error.  Use this work-around
to simply compile the effected files without -gstabs.
---
 scons/crossmingw.py              | 38 ++++++++++++++++++++++++++++++++
 src/gallium/auxiliary/SConscript |  4 ++++
 src/mapi/glapi/SConscript        |  5 +++++
 3 files changed, 47 insertions(+)

diff --git a/scons/crossmingw.py b/scons/crossmingw.py
index cc046229e2c..4a695a440d6 100644
--- a/scons/crossmingw.py
+++ b/scons/crossmingw.py
@@ -128,6 +128,42 @@ res_builder = SCons.Builder.Builder(action=res_action, suffix='.o',
                                     source_scanner=SCons.Tool.SourceFileScanner)
 SCons.Tool.SourceFileScanner.add_scanner('.rc', SCons.Defaults.CScan)
 
+
+
+def compile_without_gstabs(env, sources, c_file):
+    '''This is a hack used to compile some source files without the
+    -gstabs option.
+
+    It seems that some versions of mingw32's gcc (4.4.2 at least) die
+    when compiling large files with the -gstabs option.  -gstabs is
+    related to debug symbols and can be omitted from the effected
+    files.
+
+    This function compiles the given c_file without -gstabs, removes
+    the c_file from the sources list, then appends the new .o file to
+    sources.  Then return the new sources list.
+    '''
+
+    # Modify CCFLAGS to not have -gstabs option:
+    env2 = env.Clone()
+    flags = str(env2['CCFLAGS'])
+    flags = flags.replace("-gstabs", "")
+    env2['CCFLAGS'] = SCons.Util.CLVar(flags)
+    
+    # Build the special-case files:
+    obj_file = env2.SharedObject(c_file)
+
+    # Replace ".cpp" or ".c" with ".o"
+    o_file = c_file.replace(".cpp", ".o")
+    o_file = o_file.replace(".c", ".o")
+
+    # Replace the .c files with the specially-compiled .o file
+    sources.remove(c_file)
+    sources.append(o_file)
+
+    return sources
+
+
 def generate(env):
     mingw_prefix = find(env)
 
@@ -197,5 +233,7 @@ def generate(env):
     # Avoid depending on gcc runtime DLLs
     env.AppendUnique(LINKFLAGS = ['-static-libgcc'])
 
+    env.AddMethod(compile_without_gstabs, 'compile_without_gstabs')
+
 def exists(env):
     return find(env)
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index e00040d97ab..07c420e138d 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -58,6 +58,10 @@ if env['llvm']:
         'GALLIVM_CPP_SOURCES'
     ])
 
+    if env['toolchain'] == 'crossmingw':
+        # compile lp_bld_misc.cpp without -gstabs option
+        source = env.compile_without_gstabs(source, "gallivm/lp_bld_misc.cpp")
+
 gallium = env.ConvenienceLibrary(
     target = 'gallium',
     source = source,
diff --git a/src/mapi/glapi/SConscript b/src/mapi/glapi/SConscript
index a7764745eda..fdd65790013 100644
--- a/src/mapi/glapi/SConscript
+++ b/src/mapi/glapi/SConscript
@@ -74,6 +74,11 @@ if env['platform'] != 'winddk':
         else:
             pass
     
+    if env['toolchain'] == 'crossmingw':
+        # compile these files without -gstabs option
+        glapi_sources = env.compile_without_gstabs(glapi_sources, "glapi_dispatch.c")
+        glapi_sources = env.compile_without_gstabs(glapi_sources, "glapi_getproc.c")
+
     glapi = env.ConvenienceLibrary(
         target = 'glapi',
         source = glapi_sources,

From b59715b13afa6885fe7950677df3be2fd89dee47 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 26 Aug 2011 08:10:24 -0600
Subject: [PATCH 590/600] g3dvl: fix compilation failure on MSVC

I assume the intention of "mb = {}" was to zero-initialize it.
---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index ef00e2d9466..db05b151f95 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -789,11 +789,12 @@ entry:
 static INLINE bool
 decode_slice(struct vl_mpg12_bs *bs)
 {
-   struct pipe_mpeg12_macroblock mb = {};
+   struct pipe_mpeg12_macroblock mb;
    short dct_blocks[64*6];
    unsigned dct_scale;
    signed x = -1;
 
+   memset(&mb, 0, sizeof(mb));
    mb.base.codec = PIPE_VIDEO_CODEC_MPEG12;
    mb.y = vl_vlc_get_uimsbf(&bs->vlc, 8) - 1;
    mb.blocks = dct_blocks;

From 005aea891ec5814c3c05f20e5a65ca61db3b1b10 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 26 Aug 2011 08:24:01 -0600
Subject: [PATCH 591/600] g3dvl: s/inline/INLINE/ to fix MSVC build

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index e0b477d315a..7d53168afe5 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -238,7 +238,7 @@ cleanup_mc_buffer(struct vl_mpeg12_buffer *buf)
       vl_mc_cleanup_buffer(&buf->mc[i]);
 }
 
-static inline void
+static INLINE void
 MacroBlockTypeToPipeWeights(const struct pipe_mpeg12_macroblock *mb, unsigned weights[2])
 {
    assert(mb);
@@ -272,7 +272,7 @@ MacroBlockTypeToPipeWeights(const struct pipe_mpeg12_macroblock *mb, unsigned we
    }
 }
 
-static inline struct vl_motionvector
+static INLINE struct vl_motionvector
 MotionVectorToPipe(const struct pipe_mpeg12_macroblock *mb, unsigned vector,
                    unsigned field_select_mask, unsigned weight)
 {
@@ -323,7 +323,7 @@ MotionVectorToPipe(const struct pipe_mpeg12_macroblock *mb, unsigned vector,
    return mv;
 }
 
-static inline void
+static INLINE void
 UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec,
                   struct vl_mpeg12_buffer *buf,
                   const struct pipe_mpeg12_macroblock *mb)

From 751f0ce7736de455f3f6d8dcfbaf25bbbd3fcf55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Mon, 22 Aug 2011 15:44:42 +0200
Subject: [PATCH 592/600] st/xorg: Only damage non-front source in DRI2
 CopyRegion hook.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based on a vmwgfx xa/saa fix.

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/state_trackers/xorg/xorg_dri2.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c
index 6f2c52eabb6..3350ac736cf 100644
--- a/src/gallium/state_trackers/xorg/xorg_dri2.c
+++ b/src/gallium/state_trackers/xorg/xorg_dri2.c
@@ -372,13 +372,15 @@ dri2_copy_region(DrawablePtr pDraw, RegionPtr pRegion,
     save_accel = ms->exa->accel;
     ms->exa->accel = TRUE;
 
-    /* In case it won't be though, make sure the GPU copy contents of the
-     * source pixmap will be used for the software fallback - presumably the
-     * client modified them before calling in here.
-     */
-    exaMoveInPixmap(src_priv->pPixmap);
-    DamageRegionAppend(src_draw, pRegion);
-    DamageRegionProcessPending(src_draw);
+    if (pSrcBuffer->attachment != DRI2BufferFrontLeft) {
+	/* In case it won't be though, make sure the GPU copy contents of the
+	 * source pixmap will be used for the software fallback - presumably the
+	 * client modified them before calling in here.
+	 */
+	exaMoveInPixmap(src_priv->pPixmap);
+	DamageRegionAppend(src_draw, pRegion);
+	DamageRegionProcessPending(src_draw);
+    }
 
    if (cust && cust->winsys_context_throttle)
        cust->winsys_context_throttle(cust, ms->ctx, THROTTLE_SWAP);

From f5a4e04cdb37069bc5b92798137588fed6eda39b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Mon, 22 Aug 2011 15:44:42 +0200
Subject: [PATCH 593/600] st/xorg: Disable dirty throttling by default.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/state_trackers/xorg/xorg_driver.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c
index 063ae92f6be..0ade319cdc3 100644
--- a/src/gallium/state_trackers/xorg/xorg_driver.c
+++ b/src/gallium/state_trackers/xorg/xorg_driver.c
@@ -817,7 +817,7 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
 				&ms->swapThrottling) ?
 	X_CONFIG : X_DEFAULT;
 
-    ms->dirtyThrottling = cust ?  cust->dirty_throttling : TRUE;
+    ms->dirtyThrottling = cust ?  cust->dirty_throttling : FALSE;
     from_dt = xf86GetOptValBool(ms->Options, OPTION_THROTTLE_DIRTY,
 				&ms->dirtyThrottling) ?
 	X_CONFIG : X_DEFAULT;

From 702838a7061cde91a6bcdd3382817deb61218bf1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Mon, 22 Aug 2011 15:44:43 +0200
Subject: [PATCH 594/600] r600g: Handle PIPE_TRANSFER_MAP_DIRECTLY.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the state tracker tries to map the resource directly but we can't or don't
want to do that, fail to create a transfer.

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/r600/r600_texture.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index e41fe11ac97..7c1bd9d8ec6 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -647,6 +647,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
 		(texture->flags & R600_RESOURCE_FLAG_TRANSFER))
 		use_staging_texture = FALSE;
 
+	if (use_staging_texture && (usage & PIPE_TRANSFER_MAP_DIRECTLY))
+		return NULL;
+
 	trans = CALLOC_STRUCT(r600_transfer);
 	if (trans == NULL)
 		return NULL;

From 433c740c1a2c173445c0bb161522bf7a76fa49c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Mon, 22 Aug 2011 15:44:42 +0200
Subject: [PATCH 595/600] r600g: Hook up xorg state tracker.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mostly copied from r300g.

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 configure.ac                           |   2 +-
 src/gallium/targets/xorg-r600/Makefile |  25 +++++
 src/gallium/targets/xorg-r600/target.c |  26 +++++
 src/gallium/targets/xorg-r600/xorg.c   | 148 +++++++++++++++++++++++++
 4 files changed, 200 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/targets/xorg-r600/Makefile
 create mode 100644 src/gallium/targets/xorg-r600/target.c
 create mode 100644 src/gallium/targets/xorg-r600/xorg.c

diff --git a/configure.ac b/configure.ac
index ea58dae6593..c461f43713a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1855,7 +1855,7 @@ if test "x$with_gallium_drivers" != x; then
             ;;
         xr600)
             GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
-            gallium_check_st "r600/drm radeon/drm" "dri-r600" "" "" "xvmc-r600" "vdpau-r600" "va-r600"
+            gallium_check_st "r600/drm radeon/drm" "dri-r600" "xorg-r600" "" "xvmc-r600" "vdpau-r600" "va-r600"
             ;;
         xnouveau)
             PKG_CHECK_MODULES([NOUVEAU], [libdrm_nouveau >= $LIBDRM_NOUVEAU_REQUIRED])
diff --git a/src/gallium/targets/xorg-r600/Makefile b/src/gallium/targets/xorg-r600/Makefile
new file mode 100644
index 00000000000..4577ba605a5
--- /dev/null
+++ b/src/gallium/targets/xorg-r600/Makefile
@@ -0,0 +1,25 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = r600g_drv.so
+
+C_SOURCES = \
+	target.c \
+	xorg.c
+
+DRIVER_DEFINES = \
+	-DHAVE_CONFIG_H -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD
+
+DRIVER_PIPES = \
+	$(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
+	$(TOP)/src/gallium/drivers/r600/libr600.a \
+	$(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
+	$(TOP)/src/gallium/drivers/galahad/libgalahad.a \
+	$(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/drivers/rbug/librbug.a
+
+DRIVER_LINKS = \
+	$(shell pkg-config --libs libdrm)
+
+include ../Makefile.xorg
diff --git a/src/gallium/targets/xorg-r600/target.c b/src/gallium/targets/xorg-r600/target.c
new file mode 100644
index 00000000000..60424359a7b
--- /dev/null
+++ b/src/gallium/targets/xorg-r600/target.c
@@ -0,0 +1,26 @@
+
+#include "target-helpers/inline_debug_helper.h"
+#include "state_tracker/drm_driver.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r600/r600_public.h"
+
+static struct pipe_screen *
+create_screen(int fd)
+{
+   struct radeon_winsys *sws;
+   struct pipe_screen *screen;
+
+   sws = radeon_drm_winsys_create(fd);
+   if (!sws)
+      return NULL;
+
+   screen = r600_screen_create(sws);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen)
diff --git a/src/gallium/targets/xorg-r600/xorg.c b/src/gallium/targets/xorg-r600/xorg.c
new file mode 100644
index 00000000000..120cf6da6fd
--- /dev/null
+++ b/src/gallium/targets/xorg-r600/xorg.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ * Author: Alan Hourihane <alanh@tungstengraphics.com>
+ * Author: Jakob Bornecrantz <wallbraker@gmail.com>
+ * Author: Corbin Simpson <MostAwesomedude@gmail.com>
+ *
+ */
+
+#include "../../state_trackers/xorg/xorg_winsys.h"
+
+static void r600_xorg_identify(int flags);
+static Bool r600_xorg_pci_probe(DriverPtr driver,
+				 int entity_num,
+				 struct pci_device *device,
+				 intptr_t match_data);
+
+static const struct pci_id_match r600_xorg_device_match[] = {
+    {0x1002, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, 0, 0, 0},
+    {0, 0, 0},
+};
+
+static SymTabRec r600_xorg_chipsets[] = {
+    {PCI_MATCH_ANY, "AMD R6xx Graphics Chipset"},
+    {-1, NULL}
+};
+
+static PciChipsets r600_xorg_pci_devices[] = {
+    {PCI_MATCH_ANY, PCI_MATCH_ANY, NULL},
+    {-1, -1, NULL}
+};
+
+static XF86ModuleVersionInfo r600_xorg_version = {
+    "r600g",
+    MODULEVENDORSTRING,
+    MODINFOSTRING1,
+    MODINFOSTRING2,
+    XORG_VERSION_CURRENT,
+    0, 1, 0, /* major, minor, patch */
+    ABI_CLASS_VIDEODRV,
+    ABI_VIDEODRV_VERSION,
+    MOD_CLASS_VIDEODRV,
+    {0, 0, 0, 0}
+};
+
+/*
+ * Xorg driver exported structures
+ */
+
+_X_EXPORT DriverRec r600_driver = {
+    1,
+    "r600g",
+    r600_xorg_identify,
+    NULL,
+    xorg_tracker_available_options,
+    NULL,
+    0,
+    NULL,
+    r600_xorg_device_match,
+    r600_xorg_pci_probe
+};
+
+static MODULESETUPPROTO(r600_xorg_setup);
+
+_X_EXPORT XF86ModuleData r600gModuleData = {
+    &r600_xorg_version,
+    r600_xorg_setup,
+    NULL
+};
+
+/*
+ * Xorg driver functions
+ */
+
+static pointer
+r600_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin)
+{
+    static Bool setupDone = 0;
+
+    /* This module should be loaded only once, but check to be sure.
+     */
+    if (!setupDone) {
+	setupDone = 1;
+	xf86AddDriver(&r600_driver, module, HaveDriverFuncs);
+
+	/*
+	 * The return value must be non-NULL on success even though there
+	 * is no TearDownProc.
+	 */
+	return (pointer) 1;
+    } else {
+	if (errmaj)
+	    *errmaj = LDR_ONCEONLY;
+	return NULL;
+    }
+}
+
+static void
+r600_xorg_identify(int flags)
+{
+    xf86PrintChipsets("r600", "Driver for R6xx Gallium with KMS",
+		      r600_xorg_chipsets);
+}
+
+static Bool
+r600_xorg_pci_probe(DriverPtr driver,
+	  int entity_num, struct pci_device *device, intptr_t match_data)
+{
+    ScrnInfoPtr scrn = NULL;
+    EntityInfoPtr entity;
+
+    scrn = xf86ConfigPciEntity(scrn, 0, entity_num, r600_xorg_pci_devices,
+			       NULL, NULL, NULL, NULL, NULL);
+    if (scrn != NULL) {
+	scrn->driverVersion = 1;
+	scrn->driverName = "r600";
+	scrn->name = "R600G";
+	scrn->Probe = NULL;
+
+	entity = xf86GetEntityInfo(entity_num);
+
+	/* Use all the functions from the xorg tracker */
+	xorg_tracker_set_functions(scrn);
+    }
+    return scrn != NULL;
+}

From 3bcb9a858f482c21bc7c4d0fcd3571e25ea95090 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Tue, 23 Aug 2011 18:07:51 +0200
Subject: [PATCH 596/600] st/xorg: Fix solid fills for formats other than
 PICT_a8r8g8b8.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
---
 .../state_trackers/xorg/xorg_composite.c      | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c
index f696b72e1e3..61ba6bdddf7 100644
--- a/src/gallium/state_trackers/xorg/xorg_composite.c
+++ b/src/gallium/state_trackers/xorg/xorg_composite.c
@@ -4,6 +4,7 @@
 #include "xorg_exa_tgsi.h"
 
 #include "cso_cache/cso_context.h"
+#include "util/u_format.h"
 #include "util/u_sampler.h"
 
 
@@ -52,18 +53,17 @@ static const struct xorg_composite_blend xorg_blends[] = {
 
 
 static INLINE void
-pixel_to_float4(Pixel pixel, float *color)
+pixel_to_float4(Pixel pixel, float *color, enum pipe_format format)
 {
-   CARD32	    r, g, b, a;
+   const struct util_format_description *format_desc;
+   uint8_t packed[4];
 
-   a = (pixel >> 24) & 0xff;
-   r = (pixel >> 16) & 0xff;
-   g = (pixel >>  8) & 0xff;
-   b = (pixel >>  0) & 0xff;
-   color[0] = ((float)r) / 255.;
-   color[1] = ((float)g) / 255.;
-   color[2] = ((float)b) / 255.;
-   color[3] = ((float)a) / 255.;
+   format_desc = util_format_description(format);
+   packed[0] = pixel;
+   packed[1] = pixel >> 8;
+   packed[2] = pixel >> 16;
+   packed[3] = pixel >> 24;
+   format_desc->unpack_rgba_float(color, 0, packed, 0, 1, 1);
 }
 
 static boolean
@@ -311,7 +311,7 @@ bind_shaders(struct exa_context *exa, int op,
             vs_traits |= VS_SOLID_FILL;
             debug_assert(pSrcPicture->format == PICT_a8r8g8b8);
             pixel_to_float4(pSrcPicture->pSourcePict->solidFill.color,
-                            exa->solid_color);
+                            exa->solid_color, PIPE_FORMAT_B8G8R8A8_UNORM);
             exa->has_solid_color = TRUE;
          } else {
             debug_assert("!gradients not supported");
@@ -533,7 +533,7 @@ boolean xorg_solid_bind_state(struct exa_context *exa,
    unsigned vs_traits, fs_traits;
    struct xorg_shader shader;
 
-   pixel_to_float4(fg, exa->solid_color);
+   pixel_to_float4(fg, exa->solid_color, pixmap->tex->format);
    exa->has_solid_color = TRUE;
 
 #if 0

From 7ed14bec112e20002b18bea1caf11f17318d9f06 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 26 Aug 2011 21:45:26 +0200
Subject: [PATCH 597/600] pp: initialize the sample mask

We cannot rely on pipe drivers to default to non-zero.

Fixes pp being a no-op on nv50.

Reviewed-by: Lauri Kasanen <cand@gmx.com>
---
 src/gallium/auxiliary/postprocess/pp_program.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/auxiliary/postprocess/pp_program.c b/src/gallium/auxiliary/postprocess/pp_program.c
index 6ec8625dc1b..b92ac80a5db 100644
--- a/src/gallium/auxiliary/postprocess/pp_program.c
+++ b/src/gallium/auxiliary/postprocess/pp_program.c
@@ -133,5 +133,7 @@ pp_init_prog(struct pp_queue_t *ppq, struct pipe_screen *pscreen)
    p->surf.usage = PIPE_BIND_RENDER_TARGET;
    p->surf.format = PIPE_FORMAT_B8G8R8A8_UNORM;
 
+   p->pipe->set_sample_mask(p->pipe, ~0);
+
    return p;
 }

From 0295ac9c8e53c7ec39d18b86db3cda9092f905cb Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 26 Aug 2011 13:56:39 -0600
Subject: [PATCH 598/600] svga: include LLVM in name string in debug builds

---
 src/gallium/drivers/svga/svga_screen.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index e0c11590df0..4a60f635825 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -71,13 +71,22 @@ svga_get_vendor( struct pipe_screen *pscreen )
 static const char *
 svga_get_name( struct pipe_screen *pscreen )
 {
+   const char *build = "", *llvm = "", *mutex = "";
+   static char name[100];
 #ifdef DEBUG
    /* Only return internal details in the DEBUG version:
     */
-   return "SVGA3D; build: DEBUG; mutex: " PIPE_ATOMIC;
-#else
-   return "SVGA3D; build: RELEASE; ";
+   build = "build: DEBUG;";
+   mutex = "mutex: " PIPE_ATOMIC ";";
+#ifdef HAVE_LLVM
+   llvm = "LLVM;";
 #endif
+#else
+   build = "build: RELEASE;";
+#endif
+
+   util_snprintf(name, sizeof(name), "SVGA3D; %s %s %s", build, mutex, llvm);
+   return name;
 }
 
 

From 50da22ceb16a7398612a8b5b7036ef2e0b635eef Mon Sep 17 00:00:00 2001
From: Lauri Kasanen <cand@gmx.com>
Date: Thu, 25 Aug 2011 23:26:17 +0300
Subject: [PATCH 599/600] docs: Add a page on post-processing

With edits by Brian.

Signed-off-by: Lauri Kasanen <cand@gmx.com>
Signed-off-by: Brian Paul <brianp@vmware.com>
---
 docs/contents.html    |  1 +
 docs/postprocess.html | 56 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)
 create mode 100644 docs/postprocess.html

diff --git a/docs/contents.html b/docs/contents.html
index 8fc2ac0da9f..46e458ee174 100644
--- a/docs/contents.html
+++ b/docs/contents.html
@@ -63,6 +63,7 @@ a:visited {
 <LI><A HREF="extensions.html" target="MainFrame">Mesa Extensions</A>
 <LI><A HREF="mangling.html" target="MainFrame">Function Name Mangling</A>
 <LI><A href="llvmpipe.html" target="MainFrame">Gallium llvmpipe driver</A>
+<LI><A href="postprocess.html" target="MainFrame">Gallium post-processing</A>
 </ul>
 
 <b>Developer Topics</b>
diff --git a/docs/postprocess.html b/docs/postprocess.html
new file mode 100644
index 00000000000..2a379694298
--- /dev/null
+++ b/docs/postprocess.html
@@ -0,0 +1,56 @@
+<HTML>
+
+<TITLE>Gallium Post-processing</TITLE>
+
+<link rel="stylesheet" type="text/css" href="mesa.css"></head>
+
+<BODY>
+
+<H1>Gallium Post-processing</H1>
+
+<p>
+The Gallium drivers support user-defined image post-processing.
+At the end of drawing a frame a post-processing filter can be applied to
+the rendered image.
+Example filters include morphological antialiasing and cell shading.
+</p>
+
+<p>
+The filters can be toggled per-app via driconf, or per-session via the
+corresponding environment variables.
+</p>
+
+<p>
+Multiple filters can be used together.
+</p>
+
+
+<H2>PP environment variables</H2>
+
+<ul>
+<li>PP_DEBUG - If defined debug information will be printed to stderr.
+</ul>
+
+<h2>Current filters</h2>
+
+<ul>
+<li>pp_nored, pp_nogreen, pp_noblue - set to 1 to remove the corresponding color channel.
+These are basic filters for easy testing of the PP queue.
+<li>pp_jimenezmlaa, pp_jimenezmlaa_color -
+<a href="http://www.iryokufx.com/mlaa/" target=_blank>Jimenez's MLAA</a>
+is a morphological antialiasing filter.
+The two versions use depth and color data, respectively.
+Which works better depends on the app - depth will not blur text, but it will
+miss transparent textures for example.
+Set to a number from 2 to 32, roughly corresponding to quality.
+Numbers higher than 8 see minimizing gains.
+<li>pp_celshade - set to 1 to enable cell shading (a more complex color filter).
+</ul>
+
+
+<br>
+<br>
+
+
+</BODY>
+</HTML>

From e3b0e3776646d0367206e4544229622eb22fe9f8 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 26 Aug 2011 14:16:20 -0600
Subject: [PATCH 600/600] g3dvl: use pointer_to_uintptr() to silence a cast
 warning

---
 src/gallium/auxiliary/vl/vl_vlc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h
index 17a7b650c09..4db1334d6a4 100644
--- a/src/gallium/auxiliary/vl/vl_vlc.h
+++ b/src/gallium/auxiliary/vl/vl_vlc.h
@@ -33,6 +33,7 @@
 #include <pipe/p_compiler.h>
 
 #include <util/u_math.h>
+#include "util/u_pointer.h"
 
 struct vl_vlc
 {
@@ -98,7 +99,7 @@ vl_vlc_init(struct vl_vlc *vlc, const uint8_t *data, unsigned len)
    vlc->valid_bits = 0;
 
    /* align the data pointer */
-   while((uint64_t)data & 3) {
+   while (pointer_to_uintptr(data) & 3) {
       vlc->buffer |= (uint64_t)*data << (56 - vlc->valid_bits);
       ++data;
       --len;