diff --git a/bin/mklib b/bin/mklib index 08ef99ec105..c2760e5d892 100755 --- a/bin/mklib +++ b/bin/mklib @@ -494,13 +494,16 @@ case $ARCH in OPTS="${OPTS} -Wl,-Mmapfile.scope" fi - # Check if objects are SPARC v9 + # Check if objects are 64-bit # file says: ELF 64-bit MSB relocatable SPARCV9 Version 1 set ${OBJECTS} if [ ${LINK} = "cc" -o ${LINK} = "CC" ] ; then - SPARCV9=`file $1 | grep SPARCV9` - if [ "${SPARCV9}" ] ; then - OPTS="${OPTS} -xarch=v9" + ABI64=`file $1 | grep "ELF 64-bit"` + if [ "${ABI64}" ] ; then + case `uname -p` in + sparc) OPTS="${OPTS} -xarch=v9" ;; + i386) OPTS="${OPTS} -xarch=amd64" ;; + esac fi fi if [ "${ALTOPTS}" ] ; then diff --git a/configs/default b/configs/default index a6a92efe8fb..8fbf8dd219a 100644 --- a/configs/default +++ b/configs/default @@ -100,7 +100,7 @@ GALLIUM_DIRS = auxiliary drivers state_trackers GALLIUM_AUXILIARIES = $(TOP)/src/gallium/auxiliary/libgallium.a GALLIUM_DRIVERS_DIRS = softpipe failover svga i915 i965 r300 trace identity GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVERS_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) -GALLIUM_WINSYS_DIRS = drm null xlib +GALLIUM_WINSYS_DIRS = null xlib drm GALLIUM_TARGET_DIRS = libgl-xlib GALLIUM_WINSYS_DRM_DIRS = swrast GALLIUM_STATE_TRACKERS_DIRS = glx vega diff --git a/configs/linux-dri b/configs/linux-dri index 04bcfd859d1..d362fd8b377 100644 --- a/configs/linux-dri +++ b/configs/linux-dri @@ -58,7 +58,7 @@ PROGRAM_DIRS := egl $(PROGRAM_DIRS) EGL_DRIVERS_DIRS = glx DRIVER_DIRS = dri -GALLIUM_WINSYS_DIRS = drm +GALLIUM_WINSYS_DIRS = null xlib drm GALLIUM_TARGET_DIRS = GALLIUM_WINSYS_DRM_DIRS = vmware intel i965 GALLIUM_STATE_TRACKERS_DIRS = egl diff --git a/configure.ac b/configure.ac index 70e158f5d7a..ed47f428c9e 100644 --- a/configure.ac +++ b/configure.ac @@ -22,6 +22,8 @@ LIBDRM_REQUIRED=2.4.15 LIBDRM_RADEON_REQUIRED=2.4.17 DRI2PROTO_REQUIRED=2.1 GLPROTO_REQUIRED=1.4.11 +LIBDRM_XORG_REQUIRED=2.4.17 +LIBKMS_XORG_REQUIRED=1.0.0 dnl Check for progs AC_PROG_CPP @@ -458,6 +460,7 @@ CORE_DIRS="glsl mesa" SRC_DIRS="glew" GLU_DIRS="sgi" GALLIUM_DIRS="auxiliary drivers state_trackers" +GALLIUM_TARGET_DIRS="" GALLIUM_WINSYS_DIRS="" GALLIUM_WINSYS_DRM_DIRS="" GALLIUM_DRIVERS_DIRS="softpipe failover trace identity" @@ -467,11 +470,12 @@ case "$mesa_driver" in xlib) DRIVER_DIRS="x11" GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS xlib" + GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS libgl-xlib" ;; dri) SRC_DIRS="$SRC_DIRS glx" DRIVER_DIRS="dri" - GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS drm" + GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS xlib drm" ;; osmesa) DRIVER_DIRS="osmesa" @@ -481,6 +485,7 @@ AC_SUBST([SRC_DIRS]) AC_SUBST([GLU_DIRS]) AC_SUBST([DRIVER_DIRS]) AC_SUBST([GALLIUM_DIRS]) +AC_SUBST([GALLIUM_TARGET_DIRS]) AC_SUBST([GALLIUM_WINSYS_DIRS]) AC_SUBST([GALLIUM_WINSYS_DRM_DIRS]) AC_SUBST([GALLIUM_DRIVERS_DIRS]) @@ -1200,7 +1205,7 @@ yes) GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS egl" fi # Have only tested st/xorg on 1.6.0 servers - PKG_CHECK_MODULES(XORG, [xorg-server >= 1.6.0], + PKG_CHECK_MODULES(XORG, [xorg-server >= 1.6.0 libdrm >= $LIBDRM_XORG_REQUIRED libkms >= $LIBKMS_XORG_REQUIRED], HAVE_XORG="yes"; GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS xorg", HAVE_XORG="no") ;; @@ -1220,9 +1225,9 @@ yes) fi ;; xorg) - PKG_CHECK_MODULES(XEXT, [xextproto >= 7.0.99.1], - HAVE_XEXTPROTO_71="yes"; DEFINES="$DEFINES -DHAVE_XEXTPROTO_71", - HAVE_XEXTPROTO_71="no") + PKG_CHECK_MODULES([LIBDRM_XORG], [libdrm >= $LIBDRM_XORG_REQUIRED]) + PKG_CHECK_MODULES([LIBKMS_XORG], [libkms >= $LIBKMS_XORG_REQUIRED]) + HAVE_XORG="yes" ;; es) # mesa/es is required to build es state tracker @@ -1234,6 +1239,12 @@ yes) ;; esac +if test "x$HAVE_XORG" = xyes; then + PKG_CHECK_MODULES(XEXT, [xextproto >= 7.0.99.1], + HAVE_XEXTPROTO_71="yes"; DEFINES="$DEFINES -DHAVE_XEXTPROTO_71", + HAVE_XEXTPROTO_71="no") +fi + AC_ARG_WITH([egl-displays], [AS_HELP_STRING([--with-egl-displays@<:@=DIRS...@:>@], [comma delimited native displays libEGL supports, e.g. @@ -1415,6 +1426,7 @@ echo "" if echo "$SRC_DIRS" | grep 'gallium' >/dev/null 2>&1; then echo " Gallium: yes" echo " Gallium dirs: $GALLIUM_DIRS" + echo " Target dirs: $GALLIUM_TARGET_DIRS" echo " Winsys dirs: $GALLIUM_WINSYS_DIRS" echo " Winsys drm dirs:$GALLIUM_WINSYS_DRM_DIRS" echo " Driver dirs: $GALLIUM_DRIVERS_DIRS" diff --git a/docs/README.3DFX b/docs/README.3DFX index 037e8fa7cc6..7feda6f33f7 100644 --- a/docs/README.3DFX +++ b/docs/README.3DFX @@ -644,7 +644,7 @@ Hints and Special Features: - The Voodoo driver supports the GL_EXT_paletted_texture. it works only with GL_COLOR_INDEX8_EXT, GL_RGBA palettes and the alpha value - is ignored because this is a limitation of the the current Glide + is ignored because this is a limitation of the current Glide version and of the Voodoo hardware. See Mesa-3.1/3Dfx/demos/paltex.c for a demo of this extension. diff --git a/docs/egl.html b/docs/egl.html index 82cc06600bd..55907f6cfac 100644 --- a/docs/egl.html +++ b/docs/egl.html @@ -28,7 +28,7 @@ cards.

  1. -

    Run configure with the desired state trackers and and enable +

    Run configure with the desired state trackers and enable the Gallium driver for your hardware. For example

    diff --git a/progs/demos/stex3d.c b/progs/demos/stex3d.c
    index c0bbea0960f..de18480c25e 100644
    --- a/progs/demos/stex3d.c
    +++ b/progs/demos/stex3d.c
    @@ -36,6 +36,7 @@ static int tex_width=64, tex_height=64, tex_depth=64;
     static float angx=0, angy=0, angz=0;
     static int texgen = 2, animate = 1, smooth = 1, wireframe = 0;
     static int CurTexture = NOISE_TEXTURE, CurObject = TORUS;
    +static GLenum Filter = GL_LINEAR;
     
     
     static void
    @@ -298,8 +299,6 @@ create3Dtexture(void)
        printf("setting up 3d texture...\n");
     
        glBindTexture(GL_TEXTURE_3D, NOISE_TEXTURE);
    -   glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    -   glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
        glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_REPEAT);
        glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_REPEAT);
        glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_REPEAT);
    @@ -406,6 +405,9 @@ drawScene(void)
           glDisable(GL_TEXTURE_GEN_R);
        }
     
    +   glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, Filter);
    +   glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, Filter);
    +
        glCallList(CurObject);
        glPopMatrix();
     
    @@ -505,6 +507,12 @@ KeyHandler(unsigned char key, int x, int y)
           else
              CurObject = TORUS;
           break;
    +   case 'f':
    +      if (Filter == GL_LINEAR)
    +         Filter = GL_NEAREST;
    +      else
    +         Filter = GL_LINEAR;
    +      break;
        case 'i':
           if (CurTexture == NOISE_TEXTURE)
              CurTexture = GRADIENT_TEXTURE;
    @@ -513,6 +521,7 @@ KeyHandler(unsigned char key, int x, int y)
           glBindTexture(GL_TEXTURE_3D, CurTexture);
           break;
        case 'a':
    +   case ' ':
           animate = !animate;
           if (animate)
              glutIdleFunc(Idle);
    @@ -559,8 +568,6 @@ create3Dgradient(void)
     
     
        glBindTexture(GL_TEXTURE_3D, GRADIENT_TEXTURE);
    -   glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    -   glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
        glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_REPEAT);
        glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_REPEAT);
        glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_REPEAT);
    diff --git a/progs/egl/Makefile b/progs/egl/Makefile
    index c003cf3cc55..25de6e1f703 100644
    --- a/progs/egl/Makefile
    +++ b/progs/egl/Makefile
    @@ -57,13 +57,13 @@ peglgears: peglgears.o $(HEADERS) $(LIB_DEP)
     	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) $(LIBDRM_LIB) -lm
     
     xeglgears: xeglgears.o $(HEADERS) $(LIB_DEP)
    -	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) -lX11 -lm
    +	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) -lm -L$(libdir) -lX11
     
     xeglthreads: xeglthreads.o $(HEADERS) $(LIB_DEP)
    -	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) -lX11 -lm
    +	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) -lm -L$(libdir) -lX11
     
     xegl_tri: xegl_tri.o $(HEADERS) $(LIB_DEP)
    -	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) -lX11
    +	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) -lm -L$(libdir) -lX11
     
     clean:
     	-rm -f *.o *~
    diff --git a/progs/fpglsl/dowhile.glsl b/progs/fpglsl/dowhile.glsl
    new file mode 100644
    index 00000000000..ed9d7296753
    --- /dev/null
    +++ b/progs/fpglsl/dowhile.glsl
    @@ -0,0 +1,8 @@
    +void main() {
    +    float sum = 0.0;
    +    do {
    +	sum += 0.1;
    +	break;
    +    } while (true);
    +    gl_FragColor = vec4(sum);
    +}
    diff --git a/progs/fpglsl/dowhile2.glsl b/progs/fpglsl/dowhile2.glsl
    new file mode 100644
    index 00000000000..f3e00b8e865
    --- /dev/null
    +++ b/progs/fpglsl/dowhile2.glsl
    @@ -0,0 +1,10 @@
    +void main() {
    +    float sum = 0.0;
    +    do {
    +	sum += 0.1;
    +	if (sum < 0.499999)
    +	    continue;
    +	break;
    +    } while (true);
    +    gl_FragColor = vec4(sum);
    +}
    diff --git a/progs/fpglsl/for.glsl b/progs/fpglsl/for.glsl
    new file mode 100644
    index 00000000000..862ca8bd6cc
    --- /dev/null
    +++ b/progs/fpglsl/for.glsl
    @@ -0,0 +1,11 @@
    +uniform int KernelSizeInt;
    +
    +void main() {
    +    int i;
    +    vec4 sum = vec4(0.0);
    +    for (i = 0; i < KernelSizeInt; ++i) {
    +	sum.g += 0.25;
    +    }
    +    sum.a = 1.0;
    +    gl_FragColor = sum;
    +}
    diff --git a/progs/fpglsl/forbreak.glsl b/progs/fpglsl/forbreak.glsl
    new file mode 100644
    index 00000000000..0b8d957cb04
    --- /dev/null
    +++ b/progs/fpglsl/forbreak.glsl
    @@ -0,0 +1,13 @@
    +uniform int KernelSizeInt;
    +
    +void main() {
    +    int i;
    +    vec4 sum = vec4(0.0);
    +    for (i = 0; i < KernelSizeInt; ++i) {
    +	sum.g += 0.25;
    +        if (i > 0)
    +           break;
    +    }
    +    sum.a = 1.0;
    +    gl_FragColor = sum;
    +}
    diff --git a/progs/fpglsl/fp-tri.c b/progs/fpglsl/fp-tri.c
    index c9b08fbbad7..8af09845dd8 100644
    --- a/progs/fpglsl/fp-tri.c
    +++ b/progs/fpglsl/fp-tri.c
    @@ -128,6 +128,11 @@ static void setup_uniforms()
     
        }
     
    +   {
    +      GLint loci = glGetUniformLocationARB(program, "KernelSizeInt");
    +      if (loci >= 0)
    +         glUniform1i(loci, 4);
    +   }
        {
           GLint loc1f = glGetUniformLocationARB(program, "KernelValue1f");
           GLint loc2f = glGetUniformLocationARB(program, "KernelValue2f");
    diff --git a/progs/fpglsl/simpleif.glsl b/progs/fpglsl/simpleif.glsl
    new file mode 100644
    index 00000000000..922421b4108
    --- /dev/null
    +++ b/progs/fpglsl/simpleif.glsl
    @@ -0,0 +1,6 @@
    +void main() {
    +    // this should always be true
    +    if (gl_FragCoord.x >= 0.0) {
    +	gl_FragColor = vec4(0.5, 0.0, 0.5, 1.0);
    +    }
    +}
    diff --git a/progs/fpglsl/while.glsl b/progs/fpglsl/while.glsl
    new file mode 100644
    index 00000000000..05fb860ddcf
    --- /dev/null
    +++ b/progs/fpglsl/while.glsl
    @@ -0,0 +1,7 @@
    +void main() {
    +    float sum = 0.0;
    +    while (sum < 0.499999) {
    +	sum += 0.1;
    +    }
    +    gl_FragColor = vec4(sum);
    +}
    diff --git a/progs/fpglsl/while2.glsl b/progs/fpglsl/while2.glsl
    new file mode 100644
    index 00000000000..19c8904e283
    --- /dev/null
    +++ b/progs/fpglsl/while2.glsl
    @@ -0,0 +1,9 @@
    +void main() {
    +    float sum = 0.0;
    +    while (true) {
    +	sum += 0.1;
    +        if (sum > 0.8)
    +           break;
    +    }
    +    gl_FragColor = vec4(sum);
    +}
    diff --git a/progs/objviewer/glm.c b/progs/objviewer/glm.c
    index 7c964e489d1..77e62bfab11 100644
    --- a/progs/objviewer/glm.c
    +++ b/progs/objviewer/glm.c
    @@ -1041,7 +1041,7 @@ glmFacetNormals(GLMmodel* model)
     
     /* glmVertexNormals: Generates smooth vertex normals for a model.
      * First builds a list of all the triangles each vertex is in.  Then
    - * loops through each vertex in the the list averaging all the facet
    + * loops through each vertex in the list averaging all the facet
      * normals of the triangles each vertex is in.  Finally, sets the
      * normal index in the triangle for the vertex to the generated smooth
      * normal.  If the dot product of a facet normal and the facet normal
    diff --git a/progs/objviewer/glm.h b/progs/objviewer/glm.h
    index 8740b3684df..1a5646fa4c7 100644
    --- a/progs/objviewer/glm.h
    +++ b/progs/objviewer/glm.h
    @@ -153,7 +153,7 @@ glmFacetNormals(GLMmodel* model);
     
     /* glmVertexNormals: Generates smooth vertex normals for a model.
      * First builds a list of all the triangles each vertex is in.  Then
    - * loops through each vertex in the the list averaging all the facet
    + * loops through each vertex in the list averaging all the facet
      * normals of the triangles each vertex is in.  Finally, sets the
      * normal index in the triangle for the vertex to the generated smooth
      * normal.  If the dot product of a facet normal and the facet normal
    diff --git a/progs/redbook/.gitignore b/progs/redbook/.gitignore
    index 8ed3efe3e23..60a77523e27 100644
    --- a/progs/redbook/.gitignore
    +++ b/progs/redbook/.gitignore
    @@ -12,29 +12,39 @@ bezmesh
     checker
     clip
     colormat
    +combiner
    +convolution
     cube
    +cubemap
     depthcue
     dof
     double
     drawf
     feedback
     fog
    +fogcoord
     fogindex
     font
     hello
    +histogram
     image
     light
     lines
     list
     material
    +minmax
     mipmap
     model
     movelight
    +multisamp
    +multitex
    +mvarray
     nurbs
     pickdepth
     picksquare
     plane
     planet
    +pointp
     polyoff
     polys
     quadric
    @@ -44,10 +54,12 @@ scene
     scenebamb
     sceneflat
     select
    +shadowmap
     smooth
     stencil
     stroke
     surface
    +surfpoints
     teaambient
     teapots
     tess
    @@ -56,6 +68,7 @@ texbind
     texgen
     texprox
     texsub
    +texture3d
     texturesurf
     torus
     trim
    diff --git a/progs/tests/fbotest2.c b/progs/tests/fbotest2.c
    index 872b46279e6..faf0dd87484 100644
    --- a/progs/tests/fbotest2.c
    +++ b/progs/tests/fbotest2.c
    @@ -33,7 +33,8 @@ CheckError(int line)
     static void
     Display( void )
     {
    -   GLubyte *buffer = malloc(Width * Height * 4);
    +   GLboolean copyPix = GL_FALSE;
    +   GLboolean blitPix = GL_FALSE;
        GLenum status;
     
        CheckError(__LINE__);
    @@ -63,16 +64,43 @@ Display( void )
        glutSolidTeapot(2.0);
        glPopMatrix();
     
    -   /* read from user framebuffer */
    -   glReadPixels(0, 0, Width, Height, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
    +   if (copyPix) {
    +      glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, MyFB);
    +      glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
    +      glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, 0);
    +      glDrawBuffer(GL_BACK);
     
    -   /* draw to window */
    -   glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
    -   glDisable(GL_DEPTH_TEST);  /* in case window has depth buffer */
    -   glWindowPos2iARB(0, 0);
    -   glDrawPixels(Width, Height, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
    +      glDisable(GL_DEPTH_TEST);  /* in case window has depth buffer */
    +
    +      glWindowPos2iARB(0, 0);
    +      glCopyPixels(0, 0, Width, Height, GL_COLOR);
    +   }
    +   else if (blitPix) {
    +      glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, MyFB);
    +      glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
    +      glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, 0);
    +      glDrawBuffer(GL_BACK);
    +
    +      glDisable(GL_DEPTH_TEST);  /* in case window has depth buffer */
    +
    +      glBlitFramebufferEXT(0, 0, Width, Height,
    +                           0, 0, Width, Height,
    +                           GL_COLOR_BUFFER_BIT, GL_NEAREST);
    +   }
    +   else {
    +      GLubyte *buffer = malloc(Width * Height * 4);
    +      /* read from user framebuffer */
    +      glReadPixels(0, 0, Width, Height, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
    +
    +      /* draw to window */
    +      glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
    +      glDisable(GL_DEPTH_TEST);  /* in case window has depth buffer */
    +      glWindowPos2iARB(0, 0);
    +      glDrawPixels(Width, Height, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
    +
    +      free(buffer);
    +   }
     
    -   free(buffer);
        glutSwapBuffers();
        CheckError(__LINE__);
     }
    diff --git a/progs/tests/vao-01.c b/progs/tests/vao-01.c
    index e4a89cb19db..ee528d22439 100644
    --- a/progs/tests/vao-01.c
    +++ b/progs/tests/vao-01.c
    @@ -30,7 +30,7 @@
      * it (via \c glPopClientAttrib).  After popping, the state of the VAO is
      * examined.
      * 
    - * According the the APPLE_vertex_array_object spec, the contents of the VAO
    + * According to the APPLE_vertex_array_object spec, the contents of the VAO
      * should be restored to the values that they had when pushed.
      * 
      * \author Ian Romanick 
    diff --git a/progs/tests/vao-02.c b/progs/tests/vao-02.c
    index 9f7f5c27792..c23b4ab05a6 100644
    --- a/progs/tests/vao-02.c
    +++ b/progs/tests/vao-02.c
    @@ -30,7 +30,7 @@
      * it (via \c glPopClientAttrib).  After popping, the state of the VAO is
      * examined.
      * 
    - * According the the APPLE_vertex_array_object spec, the contents of the VAO
    + * According to the APPLE_vertex_array_object spec, the contents of the VAO
      * should be restored to the values that they had when pushed.
      * 
      * \author Ian Romanick 
    diff --git a/progs/trivial/Makefile b/progs/trivial/Makefile
    index b4a903cb68f..a10748f9487 100644
    --- a/progs/trivial/Makefile
    +++ b/progs/trivial/Makefile
    @@ -11,6 +11,7 @@ include $(TOP)/configs/current
     LIBS = -L$(TOP)/$(LIB_DIR) -l $(GLEW_LIB) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) $(APP_LIB_DEPS)
     
     SOURCES = \
    +	clear-fbo-scissor.c \
     	clear-fbo-tex.c \
     	clear-fbo.c \
     	clear-scissor.c \
    diff --git a/progs/trivial/SConscript b/progs/trivial/SConscript
    index f480da047eb..24b4f91fb0a 100644
    --- a/progs/trivial/SConscript
    +++ b/progs/trivial/SConscript
    @@ -1,7 +1,8 @@
     Import('*')
     
     progs = [
    -    'clear-fbo-tex',
    +	'clear-fbo-scissor',
    +	'clear-fbo-tex',
     	'clear-fbo',
     	'clear-scissor',
     	'clear-undefined',
    diff --git a/progs/trivial/clear-fbo-scissor.c b/progs/trivial/clear-fbo-scissor.c
    new file mode 100644
    index 00000000000..6a605e16a8b
    --- /dev/null
    +++ b/progs/trivial/clear-fbo-scissor.c
    @@ -0,0 +1,234 @@
    +/*
    + * Use scissor to clear the four quadrants of the FBO to different
    + * colors.  Then draw a grey triangle in the middle.
    + */
    +
    +
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +#include 
    +
    +
    +static int Width = 512, Height = 512;
    +static GLuint MyFB, MyRB;
    +static GLboolean UseTex = GL_FALSE;
    +static GLboolean UseCopyPix = GL_FALSE;
    +
    +
    +#define CheckError() \
    +   do { \
    +      GLenum err = glGetError(); \
    +      if (err != GL_NO_ERROR) \
    +         printf("Error: %s\n", gluErrorString(err)); \
    +      assert(err == GL_NO_ERROR); \
    +   } while (0)
    +
    +
    +static void
    +Init(void)
    +{
    +   GLenum status;
    +
    +   fprintf(stderr, "GL_RENDERER   = %s\n", (char *) glGetString(GL_RENDERER));
    +   fprintf(stderr, "GL_VERSION    = %s\n", (char *) glGetString(GL_VERSION));
    +   fprintf(stderr, "GL_VENDOR     = %s\n", (char *) glGetString(GL_VENDOR));
    +   fflush(stderr);
    +
    +   if (!glutExtensionSupported("GL_EXT_framebuffer_object")) {
    +      printf("GL_EXT_framebuffer_object not found!\n");
    +      exit(0);
    +   }
    +
    +   glGenFramebuffersEXT(1, &MyFB);
    +   glGenRenderbuffersEXT(1, &MyRB);
    +
    +   glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB);
    +
    +   if (UseTex) {
    +      GLuint tex;
    +      glGenTextures(1, &tex);
    +      glBindTexture(GL_TEXTURE_2D, tex);
    +      glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, Width, Height, 0,
    +                   GL_RGBA, GL_UNSIGNED_BYTE, NULL);
    +      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    +      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    +      glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, 
    +                                GL_COLOR_ATTACHMENT0_EXT,
    +                                GL_TEXTURE_2D, tex, 0);
    +   }
    +   else {
    +      glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, MyRB);
    +
    +      glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT,
    +                                   GL_COLOR_ATTACHMENT0_EXT,
    +                                   GL_RENDERBUFFER_EXT, MyRB);
    +
    +      glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, Width, Height);
    +   }
    +
    +   status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
    +   if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
    +      fprintf(stderr, "Framebuffer object is incomplete (0x%x)!\n", status);
    +   }
    +
    +   glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
    +}
    +
    +
    +static void
    +Reshape(int width, int height)
    +{
    +   glViewport(0, 0, width, height);
    +   glMatrixMode(GL_PROJECTION);
    +   glLoadIdentity();
    +   glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);
    +   glMatrixMode(GL_MODELVIEW);
    +
    +   Width = width;
    +   Height = height;
    +   if (!UseTex) {
    +      glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGB, Width, Height);
    +   }
    +}
    +
    +
    +static void
    +Key(unsigned char key, int x, int y)
    +{
    +   if (key == 27) {
    +      exit(0);
    +   }
    +   glutPostRedisplay();
    +}
    +
    +
    +static void
    +Draw(void)
    +{
    +   GLboolean scissor = GL_TRUE;
    +
    +   /* draw to user framebuffer */
    +   glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, MyFB);
    +   glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
    +   glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
    +
    +   glViewport(0, 0, Width, Height);
    +   CheckError();
    +
    +   if (scissor) {
    +      glEnable(GL_SCISSOR_TEST);
    +
    +      /* lower-left = red */
    +      glClearColor(1, 0, 0, 0);
    +      glScissor(0, 0, Width / 2, Height / 2);
    +      glClear(GL_COLOR_BUFFER_BIT); 
    +
    +      /* lower-right = green */
    +      glClearColor(0, 1, 0, 0);
    +      glScissor(Width / 2, 0, Width - Width / 2, Height / 2);
    +      glClear(GL_COLOR_BUFFER_BIT); 
    +
    +      /* upper-left = blue */
    +      glClearColor(0, 0, 1, 0);
    +      glScissor(0, Height / 2, Width / 2, Height - Height / 2);
    +      glClear(GL_COLOR_BUFFER_BIT); 
    +
    +      /* upper-right = white */
    +      glClearColor(1, 1, 1, 0);
    +      glScissor(Width / 2, Height / 2, Width - Width / 2, Height - Height / 2);
    +      glClear(GL_COLOR_BUFFER_BIT); 
    +
    +      glDisable(GL_SCISSOR_TEST);
    +   }
    +   else {
    +      glClearColor(0, 1, 0, 0);
    +      glClear(GL_COLOR_BUFFER_BIT);
    +   }
    +
    +   CheckError();
    +
    +   /* gray triangle in middle, pointing up */
    +   glColor3f(0.5, 0.5, 0.5);
    +   glBegin(GL_TRIANGLES);
    +   glVertex2f(Width/4, Height/4);
    +   glVertex2f(Width*3/4, Height/4);
    +   glVertex2f(Width/2, Height*3/4);
    +   glVertex2f(-0.5, -0.5);
    +   glVertex2f(+0.5, -0.5);
    +   glVertex2f( 0.0, 0.7);
    +   glEnd();
    +
    +   CheckError();
    +
    +   /* copy fbo to window */
    +   glBindFramebufferEXT(GL_READ_FRAMEBUFFER_EXT, MyFB);
    +   glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
    +   glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, 0);
    +   glDrawBuffer(GL_BACK);
    +   
    +   if (UseCopyPix) {
    +      glWindowPos2i(0, 0);
    +      glCopyPixels(0, 0, Width, Height, GL_COLOR);
    +   }
    +   else {
    +      GLubyte *buffer = malloc(Width * Height * 4);
    +
    +      /* read from user framebuffer */
    +      glReadPixels(0, 0, Width, Height, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
    +
    +      /* draw to window */
    +      glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
    +      glWindowPos2iARB(0, 0);
    +      glDrawPixels(Width, Height, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
    +
    +      free(buffer);
    +   }
    +
    +   /* Bind normal framebuffer */
    +   glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
    +
    +   glutSwapBuffers();
    +
    +   CheckError();
    +}
    +
    +
    +int
    +main(int argc, char *argv[])
    +{
    +   int i;
    +
    +   glutInit(&argc, argv);
    +   glutInitWindowPosition(100, 0);
    +   glutInitWindowSize(Width, Height);
    +   glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);
    +
    +   for (i = 1; i < argc; i++) {
    +      if (strcmp(argv[i], "-t") == 0)
    +         UseTex = GL_TRUE;
    +      else if (strcmp(argv[i], "-c") == 0)
    +         UseCopyPix = GL_TRUE;
    +   }
    +
    +   if (UseTex)
    +      printf("Using render to texture\n");
    +   else
    +      printf("Using user-created render buffer\n");
    +
    +   if (!glutCreateWindow(argv[0])) {
    +      exit(1);
    +   }
    +
    +   glewInit();
    +   Init();
    +   glutReshapeFunc(Reshape);
    +   glutKeyboardFunc(Key);
    +   glutDisplayFunc(Draw);
    +   glutMainLoop();
    +   return 0;
    +}
    diff --git a/progs/xdemos/.gitignore b/progs/xdemos/.gitignore
    index a65b890d3dc..2f5e909079a 100644
    --- a/progs/xdemos/.gitignore
    +++ b/progs/xdemos/.gitignore
    @@ -13,11 +13,14 @@ glxpixmap
     glxsnoop
     glxswapcontrol
     manywin
    +msctest
     multictx
     offset
    +omlsync
     overlay
    -pbdemo
     pbinfo
    +pbdemo
    +shape
     sharedtex
     sharedtex_mt
     texture_from_pixmap
    @@ -26,5 +29,3 @@ xdemo
     xfont
     xrotfontdemo
     yuvrect_client
    -msctest
    -omlsync
    diff --git a/progs/xdemos/Makefile b/progs/xdemos/Makefile
    index 9cf984b59e5..e87d55d011e 100644
    --- a/progs/xdemos/Makefile
    +++ b/progs/xdemos/Makefile
    @@ -11,7 +11,7 @@ LIB_DEP = $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME)
     # Add X11 and pthread libs to satisfy GNU gold.
     APP_LIB_DEPS += -lX11 -lpthread
     
    -LIBS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) $(APP_LIB_DEPS)
    +LIBS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) -L$(libdir) $(APP_LIB_DEPS)
     
     PROGS = \
     	corender \
    diff --git a/scons/llvm.py b/scons/llvm.py
    index c8e50c5916d..01eae2403a7 100644
    --- a/scons/llvm.py
    +++ b/scons/llvm.py
    @@ -82,11 +82,6 @@ def generate(env):
                 print 'scons: could not determine the LLVM version from %s' % llvm_config
                 return
     
    -        if llvm_version >= distutils.version.LooseVersion('2.7'):
    -            print 'scons: Ignoring unsupported LLVM version %s' % llvm_version
    -            print 'scons: See http://www.llvm.org/bugs/show_bug.cgi?id=6429'
    -            return
    -
             env.Prepend(CPPPATH = [os.path.join(llvm_dir, 'include')])
             env.AppendUnique(CPPDEFINES = [
                 '__STDC_LIMIT_MACROS', 
    @@ -138,11 +133,6 @@ def generate(env):
             llvm_version = env.backtick('llvm-config --version').rstrip()
             llvm_version = distutils.version.LooseVersion(llvm_version)
     
    -        if llvm_version >= distutils.version.LooseVersion('2.7'):
    -            print 'scons: Ignoring unsupported LLVM version %s' % llvm_version
    -            print 'scons: See http://www.llvm.org/bugs/show_bug.cgi?id=6429'
    -            return
    -
             try:
                 env.ParseConfig('llvm-config --cppflags')
                 env.ParseConfig('llvm-config --libs jit interpreter nativecodegen bitwriter')
    diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
    index 70d7dbdfc75..f4615064e65 100644
    --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
    +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
    @@ -436,9 +436,9 @@ aaline_create_texture(struct aaline_stage *aaline)
     
           /* This texture is new, no need to flush. 
            */
    -      transfer = screen->get_tex_transfer(screen, aaline->texture, 0, level, 0,
    +      transfer = pipe->get_tex_transfer(pipe, aaline->texture, 0, level, 0,
                                              PIPE_TRANSFER_WRITE, 0, 0, size, size);
    -      data = screen->transfer_map(screen, transfer);
    +      data = pipe->transfer_map(pipe, transfer);
           if (data == NULL)
              return FALSE;
     
    @@ -462,8 +462,8 @@ aaline_create_texture(struct aaline_stage *aaline)
           }
     
           /* unmap */
    -      screen->transfer_unmap(screen, transfer);
    -      screen->tex_transfer_destroy(transfer);
    +      pipe->transfer_unmap(pipe, transfer);
    +      pipe->tex_transfer_destroy(pipe, transfer);
        }
        return TRUE;
     }
    diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
    index e03081d65c5..794fd81d70f 100644
    --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
    +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
    @@ -377,19 +377,21 @@ pstip_update_texture(struct pstip_stage *pstip)
     {
        static const uint bit31 = 1 << 31;
        struct pipe_context *pipe = pstip->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        struct pipe_transfer *transfer;
        const uint *stipple = pstip->state.stipple->stipple;
        uint i, j;
        ubyte *data;
     
        /* XXX: want to avoid flushing just because we use stipple: 
    +    *
    +    * Flush should no longer be necessary if driver is properly
    +    * interleaving drawing and transfers on a given context:
         */
        pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL );
     
    -   transfer = screen->get_tex_transfer(screen, pstip->texture, 0, 0, 0,
    -                                       PIPE_TRANSFER_WRITE, 0, 0, 32, 32);
    -   data = screen->transfer_map(screen, transfer);
    +   transfer = pipe->get_tex_transfer(pipe, pstip->texture, 0, 0, 0,
    +				     PIPE_TRANSFER_WRITE, 0, 0, 32, 32);
    +   data = pipe->transfer_map(pipe, transfer);
     
        /*
         * Load alpha texture.
    @@ -411,8 +413,8 @@ pstip_update_texture(struct pstip_stage *pstip)
        }
     
        /* unmap */
    -   screen->transfer_unmap(screen, transfer);
    -   screen->tex_transfer_destroy(transfer);
    +   pipe->transfer_unmap(pipe, transfer);
    +   pipe->tex_transfer_destroy(pipe, transfer);
     }
     
     
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h
    index 634575670db..fe3cedcc48c 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_alpha.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_alpha.h
    @@ -35,7 +35,7 @@
     #define LP_BLD_ALPHA_H
     
     
    -#include   
    +#include "os/os_llvm.h"
     
     struct pipe_alpha_state;
     struct lp_type;
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
    index e2c67883972..aa47338b323 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
    @@ -232,6 +232,37 @@ lp_build_add(struct lp_build_context *bld,
     }
     
     
    +/** Return the sum of the elements of a */
    +LLVMValueRef
    +lp_build_sum_vector(struct lp_build_context *bld,
    +                    LLVMValueRef a)
    +{
    +   const struct lp_type type = bld->type;
    +   LLVMValueRef index, res;
    +   int i;
    +
    +   if (a == bld->zero)
    +      return bld->zero;
    +   if (a == bld->undef)
    +      return bld->undef;
    +   assert(type.length > 1);
    +
    +   assert(!bld->type.norm);
    +
    +   index = LLVMConstInt(LLVMInt32Type(), 0, 0);
    +   res = LLVMBuildExtractElement(bld->builder, a, index, "");
    +
    +   for (i = 1; i < type.length; i++) {
    +      index = LLVMConstInt(LLVMInt32Type(), i, 0);
    +      res = LLVMBuildAdd(bld->builder, res,
    +                         LLVMBuildExtractElement(bld->builder, a, index, ""),
    +                         "");
    +   }
    +
    +   return res;
    +}
    +
    +
     /**
      * Generate a - b
      */
    @@ -689,12 +720,12 @@ lp_build_negate(struct lp_build_context *bld,
     }
     
     
    +/** Return -1, 0 or +1 depending on the sign of a */
     LLVMValueRef
     lp_build_sgn(struct lp_build_context *bld,
                  LLVMValueRef a)
     {
        const struct lp_type type = bld->type;
    -   LLVMTypeRef vec_type = lp_build_vec_type(type);
        LLVMValueRef cond;
        LLVMValueRef res;
     
    @@ -704,14 +735,29 @@ lp_build_sgn(struct lp_build_context *bld,
           res = bld->one;
        }
        else if(type.floating) {
    -      /* Take the sign bit and add it to 1 constant */
    -      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
    -      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
    +      LLVMTypeRef vec_type;
    +      LLVMTypeRef int_type;
    +      LLVMValueRef mask;
           LLVMValueRef sign;
           LLVMValueRef one;
    -      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
    +      unsigned long long maskBit = (unsigned long long)1 << (type.width - 1);
    +
    +      if (type.length == 1) {
    +         int_type = lp_build_int_elem_type(type);
    +         vec_type = lp_build_elem_type(type);
    +         mask = LLVMConstInt(int_type, maskBit, 0);
    +      }
    +      else {
    +         /* vector */
    +         int_type = lp_build_int_vec_type(type);
    +         vec_type = lp_build_vec_type(type);
    +         mask = lp_build_int_const_scalar(type, maskBit);
    +      }
    +
    +      /* Take the sign bit and add it to 1 constant */
    +      sign = LLVMBuildBitCast(bld->builder, a, int_type, "");
           sign = LLVMBuildAnd(bld->builder, sign, mask, "");
    -      one = LLVMConstBitCast(bld->one, int_vec_type);
    +      one = LLVMConstBitCast(bld->one, int_type);
           res = LLVMBuildOr(bld->builder, sign, one, "");
           res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
        }
    @@ -883,6 +929,10 @@ lp_build_floor(struct lp_build_context *bld,
     
        assert(type.floating);
     
    +   if (type.length == 1) {
    +      return LLVMBuildFPTrunc(bld->builder, a, LLVMFloatType(), "");
    +   }
    +
        if(util_cpu_caps.has_sse4_1)
           return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
        else {
    @@ -953,6 +1003,9 @@ lp_build_itrunc(struct lp_build_context *bld,
     }
     
     
    +/**
    + * Convert float[] to int[] with round().
    + */
     LLVMValueRef
     lp_build_iround(struct lp_build_context *bld,
                     LLVMValueRef a)
    @@ -1013,6 +1066,14 @@ lp_build_ifloor(struct lp_build_context *bld,
        LLVMValueRef res;
     
        assert(type.floating);
    +
    +   if (type.length == 1) {
    +      /* scalar float to int */
    +      LLVMTypeRef int_type = LLVMIntType(type.width);
    +      res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
    +      return res;
    +   }
    +
        assert(lp_check_value(type, a));
     
        if(util_cpu_caps.has_sse4_1) {
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
    index 55385e3a66a..7a10fe12209 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
    @@ -37,7 +37,7 @@
     #define LP_BLD_ARIT_H
     
     
    -#include   
    +#include "os/os_llvm.h"
     
     
     struct lp_type;
    @@ -56,6 +56,10 @@ lp_build_add(struct lp_build_context *bld,
                  LLVMValueRef a,
                  LLVMValueRef b);
     
    +LLVMValueRef
    +lp_build_sum_vector(struct lp_build_context *bld,
    +                    LLVMValueRef a);
    +
     LLVMValueRef
     lp_build_sub(struct lp_build_context *bld,
                  LLVMValueRef a,
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_blend.h b/src/gallium/auxiliary/gallivm/lp_bld_blend.h
    index da272e549f3..5a9e1c1fb2f 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_blend.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_blend.h
    @@ -40,7 +40,7 @@
      * for a standalone example.
      */
     
    -#include   
    +#include "os/os_llvm.h"
      
     #include "pipe/p_format.h"
     
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c
    index 53447757e8e..8a275fa72f3 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
    @@ -221,8 +221,16 @@ lp_build_undef(struct lp_type type)
     LLVMValueRef
     lp_build_zero(struct lp_type type)
     {
    -   LLVMTypeRef vec_type = lp_build_vec_type(type);
    -   return LLVMConstNull(vec_type);
    +   if (type.length == 1) {
    +      if (type.floating)
    +         return LLVMConstReal(LLVMFloatType(), 0.0);
    +      else
    +         return LLVMConstInt(LLVMIntType(type.width), 0, 0);
    +   }
    +   else {
    +      LLVMTypeRef vec_type = lp_build_vec_type(type);
    +      return LLVMConstNull(vec_type);
    +   }
     }
                    
     
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h
    index cb8e1c7b006..40786361031 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_const.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h
    @@ -37,7 +37,7 @@
     #define LP_BLD_CONST_H
     
     
    -#include   
    +#include "os/os_llvm.h"
     
     #include 
     
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
    index 948e68fae4f..78e8155ff73 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
    @@ -37,7 +37,7 @@
     #define LP_BLD_CONV_H
     
     
    -#include   
    +#include "os/os_llvm.h"
     
     
     struct lp_type;
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
    index 583e6132b4b..441ad94786f 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
    @@ -30,7 +30,7 @@
     #define LP_BLD_DEBUG_H
     
     
    -#include 
    +#include "os/os_llvm.h"
     
     #include "pipe/p_compiler.h"
     #include "util/u_string.h"
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_depth.h b/src/gallium/auxiliary/gallivm/lp_bld_depth.h
    index 79d6981bb51..8be80024ae8 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_depth.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_depth.h
    @@ -36,7 +36,7 @@
     #define LP_BLD_DEPTH_H
     
     
    -#include   
    +#include "os/os_llvm.h"
     
      
     struct pipe_depth_state;
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
    index bc831389085..106fc03e46f 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
    @@ -308,7 +308,7 @@ lp_build_flow_scope_end(struct lp_build_flow_context *flow)
      * Note: this function has no dependencies on the flow code and could
      * be used elsewhere.
      */
    -static LLVMBasicBlockRef
    +LLVMBasicBlockRef
     lp_build_insert_new_block(LLVMBuilderRef builder, const char *name)
     {
        LLVMBasicBlockRef current_block;
    @@ -648,7 +648,9 @@ lp_build_if(struct lp_build_if_state *ctx,
           ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");
     
           /* add add the initial value of the var from the entry block */
    -      LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->entry_block, 1);
    +      if (!LLVMIsUndef(*flow->variables[i]))
    +         LLVMAddIncoming(ifthen->phi[i], flow->variables[i],
    +                         &ifthen->entry_block, 1);
        }
     
        /* create/insert true_block before merge_block */
    @@ -695,18 +697,21 @@ lp_build_endif(struct lp_build_if_state *ctx)
     {
        struct lp_build_flow_context *flow = ctx->flow;
        struct lp_build_flow_if *ifthen;
    +   LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder);
        unsigned i;
     
        ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen;
        assert(ifthen);
     
    +   /* Insert branch to the merge block from current block */
    +   LLVMBuildBr(ctx->builder, ifthen->merge_block);
    +
        if (ifthen->false_block) {
           LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
           /* for each variable, update the Phi node with a (variable, block) pair */
           for (i = 0; i < flow->num_variables; i++) {
              assert(*flow->variables[i]);
    -         LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->false_block, 1);
    -
    +         LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1);
              /* replace the variable ref with the phi function */
              *flow->variables[i] = ifthen->phi[i];
           }
    @@ -742,15 +747,18 @@ lp_build_endif(struct lp_build_if_state *ctx)
                           ifthen->true_block, ifthen->merge_block);
        }
     
    -   /* Append an unconditional Br(anch) instruction on the true_block */
    -   LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
    -   LLVMBuildBr(ctx->builder, ifthen->merge_block);
    +   /* Insert branch from end of true_block to merge_block */
        if (ifthen->false_block) {
    -      /* Append an unconditional Br(anch) instruction on the false_block */
    -      LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block);
    +      /* Append an unconditional Br(anch) instruction on the true_block */
    +      LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
           LLVMBuildBr(ctx->builder, ifthen->merge_block);
        }
    -
    +   else {
    +      /* No else clause.
    +       * Note that we've already inserted the branch at the end of
    +       * true_block.  See the very first LLVMBuildBr() call in this function.
    +       */
    +   }
     
        /* Resume building code at end of the ifthen->merge_block */
        LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
    index 4c225a0d4f9..e1588365491 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
    @@ -35,7 +35,7 @@
     #define LP_BLD_FLOW_H
     
     
    -#include   
    +#include "os/os_llvm.h"
     
     
     struct lp_type;
    @@ -145,7 +145,9 @@ lp_build_else(struct lp_build_if_state *ctx);
     
     void
     lp_build_endif(struct lp_build_if_state *ctx);
    -              
    +
    +LLVMBasicBlockRef
    +lp_build_insert_new_block(LLVMBuilderRef builder, const char *name);
     
     
     #endif /* !LP_BLD_FLOW_H */
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
    index 970bee379f5..8972c0dc178 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
    @@ -34,7 +34,7 @@
      * Pixel format helpers.
      */
     
    -#include   
    +#include "os/os_llvm.h"
     
     #include "pipe/p_format.h"
     
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_interp.h b/src/gallium/auxiliary/gallivm/lp_bld_interp.h
    index ca958cdf343..177b5e943ee 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_interp.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_interp.h
    @@ -41,7 +41,7 @@
     #define LP_BLD_INTERP_H
     
     
    -#include 
    +#include "os/os_llvm.h"
     
     #include "tgsi/tgsi_exec.h"
     
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
    index f813f27074b..7d5506c7338 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
    @@ -37,7 +37,7 @@
     #define LP_BLD_INTR_H
     
     
    -#include   
    +#include "os/os_llvm.h"
     
     
     /**
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
    index 7c585fda788..f3df3dd1388 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
    @@ -42,6 +42,26 @@
     #include "lp_bld_logic.h"
     
     
    +/*
    + * XXX
    + *
    + * Selection with vector conditional like
    + *
    + *    select <4 x i1> %C, %A, %B
    + *
    + * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not
    + * supported on any backend.
    + *
    + * Expanding the boolean vector to full SIMD register width, as in
    + *
    + *    sext <4 x i1> %C to <4 x i32>
    + *
    + * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
    + * it causes assertion failures in LLVM 2.6. It appears to work correctly on 
    + * LLVM 2.7.
    + */
    +
    +
     /**
      * Build code to compare two values 'a' and 'b' of 'type' using the given func.
      * \param func  one of PIPE_FUNC_x
    @@ -54,13 +74,11 @@ lp_build_compare(LLVMBuilderRef builder,
                      LLVMValueRef a,
                      LLVMValueRef b)
     {
    -   LLVMTypeRef vec_type = lp_build_vec_type(type);
        LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
        LLVMValueRef zeros = LLVMConstNull(int_vec_type);
        LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
        LLVMValueRef cond;
        LLVMValueRef res;
    -   unsigned i;
     
        assert(func >= PIPE_FUNC_NEVER);
        assert(func <= PIPE_FUNC_ALWAYS);
    @@ -74,10 +92,12 @@ lp_build_compare(LLVMBuilderRef builder,
     
        /* XXX: It is not clear if we should use the ordered or unordered operators */
     
    +#if HAVE_LLVM < 0x0207
     #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
        if(type.width * type.length == 128) {
           if(type.floating && util_cpu_caps.has_sse) {
              /* float[4] comparison */
    +         LLVMTypeRef vec_type = lp_build_vec_type(type);
              LLVMValueRef args[3];
              unsigned cc;
              boolean swap;
    @@ -147,6 +167,7 @@ lp_build_compare(LLVMBuilderRef builder,
              const char *pcmpgt;
              LLVMValueRef args[2];
              LLVMValueRef res;
    +         LLVMTypeRef vec_type = lp_build_vec_type(type);
     
              switch (type.width) {
              case 8:
    @@ -200,6 +221,7 @@ lp_build_compare(LLVMBuilderRef builder,
           }
        } /* if (type.width * type.length == 128) */
     #endif
    +#endif /* HAVE_LLVM < 0x0207 */
     
        if(type.floating) {
           LLVMRealPredicate op;
    @@ -233,16 +255,19 @@ lp_build_compare(LLVMBuilderRef builder,
              return lp_build_undef(type);
           }
     
    -#if 0
    -      /* XXX: Although valid IR, no LLVM target currently support this */
    +#if HAVE_LLVM >= 0x0207
           cond = LLVMBuildFCmp(builder, op, a, b, "");
    -      res = LLVMBuildSelect(builder, cond, ones, zeros, "");
    +      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
     #else
    -      res = LLVMGetUndef(int_vec_type);
           if (type.length == 1) {
    -         res = LLVMBuildFCmp(builder, op, a, b, "");
    +         cond = LLVMBuildFCmp(builder, op, a, b, "");
    +         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
           }
           else {
    +         unsigned i;
    +
    +         res = LLVMGetUndef(int_vec_type);
    +
              debug_printf("%s: warning: using slow element-wise float"
                           " vector comparison\n", __FUNCTION__);
              for (i = 0; i < type.length; ++i) {
    @@ -286,16 +311,19 @@ lp_build_compare(LLVMBuilderRef builder,
              return lp_build_undef(type);
           }
     
    -#if 0
    -      /* XXX: Although valid IR, no LLVM target currently support this */
    +#if HAVE_LLVM >= 0x0207
           cond = LLVMBuildICmp(builder, op, a, b, "");
    -      res = LLVMBuildSelect(builder, cond, ones, zeros, "");
    +      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
     #else
    -      res = LLVMGetUndef(int_vec_type);
           if (type.length == 1) {
    -         res = LLVMBuildICmp(builder, op, a, b, "");
    +         cond = LLVMBuildICmp(builder, op, a, b, "");
    +         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
           }
           else {
    +         unsigned i;
    +
    +         res = LLVMGetUndef(int_vec_type);
    +
              debug_printf("%s: warning: using slow element-wise int"
                           " vector comparison\n", __FUNCTION__);
     
    @@ -337,6 +365,8 @@ lp_build_cmp(struct lp_build_context *bld,
     
     /**
      * Return mask ? a : b;
    + *
    + * mask is a bitwise mask, composed of 0 or ~0 for each element.
      */
     LLVMValueRef
     lp_build_select(struct lp_build_context *bld,
    @@ -351,6 +381,7 @@ lp_build_select(struct lp_build_context *bld,
           return a;
     
        if (type.length == 1) {
    +      mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
           res = LLVMBuildSelect(bld->builder, mask, a, b, "");
        }
        else {
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
    index a399ebf39ef..b54ec13b701 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h
    @@ -37,7 +37,7 @@
     #define LP_BLD_LOGIC_H
     
     
    -#include   
    +#include "os/os_llvm.h"
     
     #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */
     
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
    index 4c61d107494..23398f41f99 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
    @@ -256,7 +256,7 @@ lp_build_pack2(LLVMBuilderRef builder,
                    LLVMValueRef lo,
                    LLVMValueRef hi)
     {
    -#if !(HAVE_LLVM >= 0x0207)
    +#if HAVE_LLVM < 0x0207
        LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
     #endif
        LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
    index fb2a34984a4..346a17d5803 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
    @@ -37,7 +37,7 @@
     #define LP_BLD_PACK_H
     
     
    -#include   
    +#include "os/os_llvm.h"
     
     
     struct lp_type;
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
    index 311c9f1b9e4..543fd5fea3f 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
    @@ -155,14 +155,16 @@ lp_build_gather(LLVMBuilderRef builder,
     /**
      * Compute the offset of a pixel.
      *
    - * x, y, y_stride are vectors
    + * x, y, z, y_stride, z_stride are vectors
      */
     LLVMValueRef
     lp_build_sample_offset(struct lp_build_context *bld,
                            const struct util_format_description *format_desc,
                            LLVMValueRef x,
                            LLVMValueRef y,
    -                       LLVMValueRef y_stride)
    +                       LLVMValueRef z,
    +                       LLVMValueRef y_stride,
    +                       LLVMValueRef z_stride)
     {
        LLVMValueRef x_stride;
        LLVMValueRef offset;
    @@ -178,6 +180,10 @@ lp_build_sample_offset(struct lp_build_context *bld,
           LLVMValueRef y_offset_lo, y_offset_hi;
           LLVMValueRef offset_lo, offset_hi;
     
    +      /* XXX 1D & 3D addressing not done yet */
    +      assert(!z);
    +      assert(!z_stride);
    +
           x_lo = LLVMBuildAnd(bld->builder, x, bld->one, "");
           y_lo = LLVMBuildAnd(bld->builder, y, bld->one, "");
     
    @@ -201,13 +207,17 @@ lp_build_sample_offset(struct lp_build_context *bld,
           offset = lp_build_add(bld, offset_hi, offset_lo);
        }
        else {
    -      LLVMValueRef x_offset;
    -      LLVMValueRef y_offset;
    +      offset = lp_build_mul(bld, x, x_stride);
     
    -      x_offset = lp_build_mul(bld, x, x_stride);
    -      y_offset = lp_build_mul(bld, y, y_stride);
    +      if (y && y_stride) {
    +         LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
    +         offset = lp_build_add(bld, offset, y_offset);
    +      }
     
    -      offset = lp_build_add(bld, x_offset, y_offset);
    +      if (z && z_stride) {
    +         LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
    +         offset = lp_build_add(bld, offset, z_offset);
    +      }
        }
     
        return offset;
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
    index 68db91d6fd6..7f08bfaac1f 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
    @@ -36,7 +36,7 @@
     #define LP_BLD_SAMPLE_H
     
     
    -#include 
    +#include "os/os_llvm.h"
     
     struct pipe_texture;
     struct pipe_sampler_state;
    @@ -113,9 +113,9 @@ struct lp_sampler_dynamic_state
                       unsigned unit);
     
        LLVMValueRef
    -   (*stride)( struct lp_sampler_dynamic_state *state,
    -              LLVMBuilderRef builder,
    -              unsigned unit);
    +   (*row_stride)( struct lp_sampler_dynamic_state *state,
    +                  LLVMBuilderRef builder,
    +                  unsigned unit);
     
        LLVMValueRef
        (*data_ptr)( struct lp_sampler_dynamic_state *state,
    @@ -148,7 +148,9 @@ lp_build_sample_offset(struct lp_build_context *bld,
                            const struct util_format_description *format_desc,
                            LLVMValueRef x,
                            LLVMValueRef y,
    -                       LLVMValueRef y_stride);
    +                       LLVMValueRef z,
    +                       LLVMValueRef y_stride,
    +                       LLVMValueRef z_stride);
     
     
     void
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
    index a965d394f44..72018737a85 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
    @@ -48,6 +48,7 @@
     #include "lp_bld_logic.h"
     #include "lp_bld_swizzle.h"
     #include "lp_bld_pack.h"
    +#include "lp_bld_flow.h"
     #include "lp_bld_format.h"
     #include "lp_bld_sample.h"
     
    @@ -139,7 +140,55 @@ lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
     
     
     /**
    - * Gen code to fetch a texel from a texture at int coords (x, y).
    + * Dereference stride_array[mipmap_level] array to get a stride.
    + * Return stride as a vector.
    + */
    +static LLVMValueRef
    +lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
    +                              LLVMValueRef stride_array, LLVMValueRef level)
    +{
    +   LLVMValueRef indexes[2], stride;
    +   indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
    +   indexes[1] = level;
    +   stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
    +   stride = LLVMBuildLoad(bld->builder, stride, "");
    +   stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
    +   return stride;
    +}
    +
    +
    +/** Dereference stride_array[0] array to get a stride (as vector). */
    +static LLVMValueRef
    +lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
    +                                    LLVMValueRef stride_array, int level)
    +{
    +   LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
    +   return lp_build_get_level_stride_vec(bld, stride_array, lvl);
    +}
    +
    +
    +static int
    +texture_dims(enum pipe_texture_target tex)
    +{
    +   switch (tex) {
    +   case PIPE_TEXTURE_1D:
    +      return 1;
    +   case PIPE_TEXTURE_2D:
    +   case PIPE_TEXTURE_CUBE:
    +      return 2;
    +   case PIPE_TEXTURE_3D:
    +      return 3;
    +   default:
    +      assert(0 && "bad texture target in texture_dims()");
    +      return 2;
    +   }
    +}
    +
    +
    +
    +/**
    + * Generate code to fetch a texel from a texture at int coords (x, y, z).
    + * The computation depends on whether the texture is 1D, 2D or 3D.
      * The result, texel, will be:
      *   texel[0] = red values
      *   texel[1] = green values
    @@ -150,12 +199,16 @@ static void
     lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
                               LLVMValueRef width,
                               LLVMValueRef height,
    +                          LLVMValueRef depth,
                               LLVMValueRef x,
                               LLVMValueRef y,
    +                          LLVMValueRef z,
                               LLVMValueRef y_stride,
    +                          LLVMValueRef z_stride,
                               LLVMValueRef data_ptr,
                               LLVMValueRef *texel)
     {
    +   const int dims = texture_dims(bld->static_state->target);
        struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
        LLVMValueRef offset;
        LLVMValueRef packed;
    @@ -169,7 +222,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
           use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
        }
     
    -   if (wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
    +   if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
           LLVMValueRef b1, b2;
           b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
           b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
    @@ -182,6 +235,19 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
           }
        }
     
    +   if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
    +      LLVMValueRef b1, b2;
    +      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
    +      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
    +      if (use_border) {
    +         use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
    +         use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
    +      }
    +      else {
    +         use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
    +      }
    +   }
    +
        /*
         * Note: if we find an app which frequently samples the texture border
         * we might want to implement a true conditional here to avoid sampling
    @@ -197,10 +263,10 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
         * the texel color results with the border color.
         */
     
    -   /* convert x,y coords to linear offset from start of texture, in bytes */
    +   /* convert x,y,z coords to linear offset from start of texture, in bytes */
        offset = lp_build_sample_offset(&bld->uint_coord_bld,
                                        bld->format_desc,
    -                                   x, y, y_stride);
    +                                   x, y, z, y_stride, z_stride);
     
        assert(bld->format_desc->block.width == 1);
        assert(bld->format_desc->block.height == 1);
    @@ -213,6 +279,8 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
                                 bld->texel_type.width,
                                 data_ptr, offset);
     
    +   texel[0] = texel[1] = texel[2] = texel[3] = NULL;
    +
        /* convert texels to float rgba */
        lp_build_unpack_rgba_soa(bld->builder,
                                 bld->format_desc,
    @@ -245,7 +313,7 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
     
        offset = lp_build_sample_offset(&bld->uint_coord_bld,
                                        bld->format_desc,
    -                                   x, y, y_stride);
    +                                   x, y, NULL, y_stride, NULL);
     
        assert(bld->format_desc->block.width == 1);
        assert(bld->format_desc->block.height == 1);
    @@ -756,24 +824,6 @@ lp_build_minify(struct lp_build_sample_context *bld,
     }
     
     
    -static int
    -texture_dims(enum pipe_texture_target tex)
    -{
    -   switch (tex) {
    -   case PIPE_TEXTURE_1D:
    -      return 1;
    -   case PIPE_TEXTURE_2D:
    -   case PIPE_TEXTURE_CUBE:
    -      return 2;
    -   case PIPE_TEXTURE_3D:
    -      return 3;
    -   default:
    -      assert(0 && "bad texture target in texture_dims()");
    -      return 2;
    -   }
    -}
    -
    -
     /**
      * Generate code to compute texture level of detail (lambda).
      * \param s  vector of texcoord s values
    @@ -794,7 +844,6 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
     
     {
        const int dims = texture_dims(bld->static_state->target);
    -   struct lp_build_context *coord_bld = &bld->coord_bld;
        struct lp_build_context *float_bld = &bld->float_bld;
        LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias);
        LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
    @@ -921,161 +970,635 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                                LLVMValueRef *level1_out,
                                LLVMValueRef *weight_out)
     {
    -   struct lp_build_context *coord_bld = &bld->coord_bld;
    -   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    +   struct lp_build_context *float_bld = &bld->float_bld;
    +   struct lp_build_context *int_bld = &bld->int_bld;
        LLVMValueRef last_level, level;
     
        last_level = bld->dynamic_state->last_level(bld->dynamic_state,
                                                    bld->builder, unit);
     
        /* convert float lod to integer */
    -   level = lp_build_ifloor(coord_bld, lod);
    +   level = lp_build_ifloor(float_bld, lod);
     
        /* compute level 0 and clamp to legal range of levels */
    -   *level0_out = lp_build_clamp(int_coord_bld, level,
    -                                int_coord_bld->zero,
    +   *level0_out = lp_build_clamp(int_bld, level,
    +                                int_bld->zero,
                                     last_level);
        /* compute level 1 and clamp to legal range of levels */
    -   *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one);
    -   *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero);
    +   *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
    +   *level1_out = lp_build_min(int_bld, *level1_out, int_bld->zero);
     
    -   *weight_out = lp_build_fract(coord_bld, lod);
    -}
    -
    -
    -
    -/**
    - * Sample 2D texture with nearest filtering, no mipmapping.
    - */
    -static void
    -lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
    -                               LLVMValueRef s,
    -                               LLVMValueRef t,
    -                               LLVMValueRef width,
    -                               LLVMValueRef height,
    -                               LLVMValueRef stride,
    -                               LLVMValueRef data_array,
    -                               LLVMValueRef *texel)
    -{
    -   LLVMValueRef x, y;
    -   LLVMValueRef data_ptr;
    -
    -   x = lp_build_sample_wrap_nearest(bld, s, width,
    -                                    bld->static_state->pot_width,
    -                                    bld->static_state->wrap_s);
    -   y = lp_build_sample_wrap_nearest(bld, t, height,
    -                                    bld->static_state->pot_height,
    -                                    bld->static_state->wrap_t);
    -
    -   lp_build_name(x, "tex.x.wrapped");
    -   lp_build_name(y, "tex.y.wrapped");
    -
    -   /* get pointer to mipmap level 0 data */
    -   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
    -
    -   lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel);
    +   *weight_out = lp_build_fract(float_bld, lod);
     }
     
     
     /**
    - * Sample 2D texture with nearest filtering, nearest mipmap.
    + * Generate code to sample a mipmap level with nearest filtering.
    + * If sampling a cube texture, r = cube face in [0,5].
      */
     static void
    -lp_build_sample_2d_nearest_mip_nearest_soa(struct lp_build_sample_context *bld,
    -                                           unsigned unit,
    -                                           LLVMValueRef s,
    -                                           LLVMValueRef t,
    -                                           LLVMValueRef width,
    -                                           LLVMValueRef height,
    -                                           LLVMValueRef width_vec,
    -                                           LLVMValueRef height_vec,
    -                                           LLVMValueRef stride,
    -                                           LLVMValueRef data_array,
    -                                           LLVMValueRef *texel)
    +lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
    +                              LLVMValueRef width_vec,
    +                              LLVMValueRef height_vec,
    +                              LLVMValueRef depth_vec,
    +                              LLVMValueRef row_stride_vec,
    +                              LLVMValueRef img_stride_vec,
    +                              LLVMValueRef data_ptr,
    +                              LLVMValueRef s,
    +                              LLVMValueRef t,
    +                              LLVMValueRef r,
    +                              LLVMValueRef colors_out[4])
     {
    -   LLVMValueRef x, y;
    -   LLVMValueRef lod, ilevel, ilevel_vec;
    -   LLVMValueRef data_ptr;
    -
    -   /* compute float LOD */
    -   lod = lp_build_lod_selector(bld, s, t, NULL, width, height, NULL);
    -
    -   /* convert LOD to int */
    -   lp_build_nearest_mip_level(bld, unit, lod, &ilevel);
    -
    -   ilevel_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel);
    -
    -   /* compute width_vec, height at mipmap level 'ilevel' */
    -   width_vec = lp_build_minify(bld, width_vec, ilevel_vec);
    -   height_vec = lp_build_minify(bld, height_vec, ilevel_vec);
    -   stride = lp_build_minify(bld, stride, ilevel_vec);
    +   const int dims = texture_dims(bld->static_state->target);
    +   LLVMValueRef x, y, z;
     
    +   /*
    +    * Compute integer texcoords.
    +    */
        x = lp_build_sample_wrap_nearest(bld, s, width_vec,
                                         bld->static_state->pot_width,
                                         bld->static_state->wrap_s);
    -   y = lp_build_sample_wrap_nearest(bld, t, height_vec,
    -                                    bld->static_state->pot_height,
    -                                    bld->static_state->wrap_t);
    -
        lp_build_name(x, "tex.x.wrapped");
    -   lp_build_name(y, "tex.y.wrapped");
     
    -   /* get pointer to mipmap level [ilevel] data */
    -   if (0)
    -      data_ptr = lp_build_get_mipmap_level(bld, data_array, ilevel);
    -   else
    -      data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
    +   if (dims >= 2) {
    +      y = lp_build_sample_wrap_nearest(bld, t, height_vec,
    +                                       bld->static_state->pot_height,
    +                                       bld->static_state->wrap_t);
    +      lp_build_name(y, "tex.y.wrapped");
     
    -   lp_build_sample_texel_soa(bld, width_vec, height_vec, x, y, stride, data_ptr, texel);
    +      if (dims == 3) {
    +         z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
    +                                          bld->static_state->pot_height,
    +                                          bld->static_state->wrap_r);
    +         lp_build_name(z, "tex.z.wrapped");
    +      }
    +      else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
    +         z = r;
    +      }
    +      else {
    +         z = NULL;
    +      }
    +   }
    +   else {
    +      y = z = NULL;
    +   }
    +
    +   /*
    +    * Get texture colors.
    +    */
    +   lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
    +                             x, y, z,
    +                             row_stride_vec, img_stride_vec,
    +                             data_ptr, colors_out);
     }
     
     
     /**
    - * Sample 2D texture with bilinear filtering.
    + * Generate code to sample a mipmap level with linear filtering.
    + * If sampling a cube texture, r = cube face in [0,5].
      */
     static void
    -lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
    -                              LLVMValueRef s,
    -                              LLVMValueRef t,
    -                              LLVMValueRef width,
    -                              LLVMValueRef height,
    -                              LLVMValueRef stride,
    -                              LLVMValueRef data_array,
    -                              LLVMValueRef *texel)
    +lp_build_sample_image_linear(struct lp_build_sample_context *bld,
    +                             LLVMValueRef width_vec,
    +                             LLVMValueRef height_vec,
    +                             LLVMValueRef depth_vec,
    +                             LLVMValueRef row_stride_vec,
    +                             LLVMValueRef img_stride_vec,
    +                             LLVMValueRef data_ptr,
    +                             LLVMValueRef s,
    +                             LLVMValueRef t,
    +                             LLVMValueRef r,
    +                             LLVMValueRef colors_out[4])
     {
    -   LLVMValueRef s_fpart;
    -   LLVMValueRef t_fpart;
    -   LLVMValueRef x0, x1;
    -   LLVMValueRef y0, y1;
    +   const int dims = texture_dims(bld->static_state->target);
    +   LLVMValueRef x0, y0, z0, x1, y1, z1;
    +   LLVMValueRef s_fpart, t_fpart, r_fpart;
        LLVMValueRef neighbors[2][2][4];
    -   LLVMValueRef data_ptr;
    -   unsigned chan;
    +   int chan;
     
    -   lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width,
    -                               bld->static_state->wrap_s, &x0, &x1, &s_fpart);
    -   lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height,
    -                               bld->static_state->wrap_t, &y0, &y1, &t_fpart);
    +   /*
    +    * Compute integer texcoords.
    +    */
    +   lp_build_sample_wrap_linear(bld, s, width_vec,
    +                               bld->static_state->pot_width,
    +                               bld->static_state->wrap_s,
    +                               &x0, &x1, &s_fpart);
    +   lp_build_name(x0, "tex.x0.wrapped");
    +   lp_build_name(x1, "tex.x1.wrapped");
     
    -   /* get pointer to mipmap level 0 data */
    -   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
    +   if (dims >= 2) {
    +      lp_build_sample_wrap_linear(bld, t, height_vec,
    +                                  bld->static_state->pot_height,
    +                                  bld->static_state->wrap_t,
    +                                  &y0, &y1, &t_fpart);
    +      lp_build_name(y0, "tex.y0.wrapped");
    +      lp_build_name(y1, "tex.y1.wrapped");
     
    -   lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]);
    -   lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]);
    -   lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]);
    -   lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]);
    +      if (dims == 3) {
    +         lp_build_sample_wrap_linear(bld, r, depth_vec,
    +                                     bld->static_state->pot_depth,
    +                                     bld->static_state->wrap_r,
    +                                     &z0, &z1, &r_fpart);
    +         lp_build_name(z0, "tex.z0.wrapped");
    +         lp_build_name(z1, "tex.z1.wrapped");
    +      }
    +      else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
    +         z0 = z1 = r;  /* cube face */
    +         r_fpart = NULL;
    +      }
    +      else {
    +         z0 = z1 = NULL;
    +         r_fpart = NULL;
    +      }
    +   }
    +   else {
    +      y0 = y1 = t_fpart = NULL;
    +      z0 = z1 = r_fpart = NULL;
    +   }
     
    -   /* TODO: Don't interpolate missing channels */
    -   for(chan = 0; chan < 4; ++chan) {
    -      texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
    -                                     s_fpart, t_fpart,
    -                                     neighbors[0][0][chan],
    -                                     neighbors[0][1][chan],
    -                                     neighbors[1][0][chan],
    -                                     neighbors[1][1][chan]);
    +   /*
    +    * Get texture colors.
    +    */
    +   /* get x0/x1 texels */
    +   lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
    +                             x0, y0, z0,
    +                             row_stride_vec, img_stride_vec,
    +                             data_ptr, neighbors[0][0]);
    +   lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
    +                             x1, y0, z0,
    +                             row_stride_vec, img_stride_vec,
    +                             data_ptr, neighbors[0][1]);
    +
    +   if (dims == 1) {
    +      /* Interpolate two samples from 1D image to produce one color */
    +      for (chan = 0; chan < 4; chan++) {
    +         colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
    +                                          neighbors[0][0][chan],
    +                                          neighbors[0][1][chan]);
    +      }
    +   }
    +   else {
    +      /* 2D/3D texture */
    +      LLVMValueRef colors0[4];
    +
    +      /* get x0/x1 texels at y1 */
    +      lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
    +                                x0, y1, z0,
    +                                row_stride_vec, img_stride_vec,
    +                                data_ptr, neighbors[1][0]);
    +      lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
    +                                x1, y1, z0,
    +                                row_stride_vec, img_stride_vec,
    +                                data_ptr, neighbors[1][1]);
    +
    +      /* Bilinear interpolate the four samples from the 2D image / 3D slice */
    +      for (chan = 0; chan < 4; chan++) {
    +         colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
    +                                          s_fpart, t_fpart,
    +                                          neighbors[0][0][chan],
    +                                          neighbors[0][1][chan],
    +                                          neighbors[1][0][chan],
    +                                          neighbors[1][1][chan]);
    +      }
    +
    +      if (dims == 3) {
    +         LLVMValueRef neighbors1[2][2][4];
    +         LLVMValueRef colors1[4];
    +
    +         /* get x0/x1/y0/y1 texels at z1 */
    +         lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
    +                                   x0, y0, z1,
    +                                   row_stride_vec, img_stride_vec,
    +                                   data_ptr, neighbors1[0][0]);
    +         lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
    +                                   x1, y0, z1,
    +                                   row_stride_vec, img_stride_vec,
    +                                   data_ptr, neighbors1[0][1]);
    +         lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
    +                                   x0, y1, z1,
    +                                   row_stride_vec, img_stride_vec,
    +                                   data_ptr, neighbors1[1][0]);
    +         lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
    +                                   x1, y1, z1,
    +                                   row_stride_vec, img_stride_vec,
    +                                   data_ptr, neighbors1[1][1]);
    +
    +         /* Bilinear interpolate the four samples from the second Z slice */
    +         for (chan = 0; chan < 4; chan++) {
    +            colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
    +                                             s_fpart, t_fpart,
    +                                             neighbors1[0][0][chan],
    +                                             neighbors1[0][1][chan],
    +                                             neighbors1[1][0][chan],
    +                                             neighbors1[1][1][chan]);
    +         }
    +
    +         /* Linearly interpolate the two samples from the two 3D slices */
    +         for (chan = 0; chan < 4; chan++) {
    +            colors_out[chan] = lp_build_lerp(&bld->texel_bld,
    +                                             r_fpart,
    +                                             colors0[chan], colors1[chan]);
    +         }
    +      }
    +      else {
    +         /* 2D tex */
    +         for (chan = 0; chan < 4; chan++) {
    +            colors_out[chan] = colors0[chan];
    +         }
    +      }
        }
     }
     
     
    +/** Helper used by lp_build_cube_lookup() */
    +static LLVMValueRef
    +lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
    +{
    +   /* ima = -0.5 / abs(coord); */
    +   LLVMValueRef negHalf = lp_build_const_scalar(coord_bld->type, -0.5);
    +   LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
    +   LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
    +                                   lp_build_rcp(coord_bld, absCoord));
    +   return ima;
    +}
    +
    +
    +/**
    + * Helper used by lp_build_cube_lookup()
    + * \param sign  scalar +1 or -1
    + * \param coord  float vector
    + * \param ima  float vector
    + */
    +static LLVMValueRef
    +lp_build_cube_coord(struct lp_build_context *coord_bld,
    +                    LLVMValueRef sign, int negate_coord,
    +                    LLVMValueRef coord, LLVMValueRef ima)
    +{
    +   /* return negate(coord) * ima * sign + 0.5; */
    +   LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5);
    +   LLVMValueRef res;
    +
    +   assert(negate_coord == +1 || negate_coord == -1);
    +
    +   if (negate_coord == -1) {
    +      coord = lp_build_negate(coord_bld, coord);
    +   }
    +
    +   res = lp_build_mul(coord_bld, coord, ima);
    +   if (sign) {
    +      sign = lp_build_broadcast_scalar(coord_bld, sign);
    +      res = lp_build_mul(coord_bld, res, sign);
    +   }
    +   res = lp_build_add(coord_bld, res, half);
    +
    +   return res;
    +}
    +
    +
    +/** Helper used by lp_build_cube_lookup()
    + * Return (major_coord >= 0) ? pos_face : neg_face;
    + */
    +static LLVMValueRef
    +lp_build_cube_face(struct lp_build_sample_context *bld,
    +                   LLVMValueRef major_coord,
    +                   unsigned pos_face, unsigned neg_face)
    +{
    +   LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
    +                                    major_coord,
    +                                    bld->float_bld.zero, "");
    +   LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
    +   LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
    +   LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
    +   return res;
    +}
    +
    +
    +
    +/**
    + * Generate code to do cube face selection and per-face texcoords.
    + */
    +static void
    +lp_build_cube_lookup(struct lp_build_sample_context *bld,
    +                     LLVMValueRef s,
    +                     LLVMValueRef t,
    +                     LLVMValueRef r,
    +                     LLVMValueRef *face,
    +                     LLVMValueRef *face_s,
    +                     LLVMValueRef *face_t)
    +{
    +   struct lp_build_context *float_bld = &bld->float_bld;
    +   struct lp_build_context *coord_bld = &bld->coord_bld;
    +   LLVMValueRef rx, ry, rz;
    +   LLVMValueRef arx, ary, arz;
    +   LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
    +   LLVMValueRef arx_ge_ary, arx_ge_arz;
    +   LLVMValueRef ary_ge_arx, ary_ge_arz;
    +   LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
    +   LLVMValueRef rx_pos, ry_pos, rz_pos;
    +
    +   assert(bld->coord_bld.type.length == 4);
    +
    +   /*
    +    * Use the average of the four pixel's texcoords to choose the face.
    +    */
    +   rx = lp_build_mul(float_bld, c25,
    +                     lp_build_sum_vector(&bld->coord_bld, s));
    +   ry = lp_build_mul(float_bld, c25,
    +                     lp_build_sum_vector(&bld->coord_bld, t));
    +   rz = lp_build_mul(float_bld, c25,
    +                     lp_build_sum_vector(&bld->coord_bld, r));
    +
    +   arx = lp_build_abs(float_bld, rx);
    +   ary = lp_build_abs(float_bld, ry);
    +   arz = lp_build_abs(float_bld, rz);
    +
    +   /*
    +    * Compare sign/magnitude of rx,ry,rz to determine face
    +    */
    +   arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
    +   arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
    +   ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
    +   ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
    +
    +   arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
    +   ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
    +
    +   rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
    +   ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
    +   rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
    +
    +   {
    +      struct lp_build_flow_context *flow_ctx;
    +      struct lp_build_if_state if_ctx;
    +
    +      flow_ctx = lp_build_flow_create(bld->builder);
    +      lp_build_flow_scope_begin(flow_ctx);
    +
    +      *face_s = bld->coord_bld.undef;
    +      *face_t = bld->coord_bld.undef;
    +      *face = bld->int_bld.undef;
    +
    +      lp_build_name(*face_s, "face_s");
    +      lp_build_name(*face_t, "face_t");
    +      lp_build_name(*face, "face");
    +
    +      lp_build_flow_scope_declare(flow_ctx, face_s);
    +      lp_build_flow_scope_declare(flow_ctx, face_t);
    +      lp_build_flow_scope_declare(flow_ctx, face);
    +
    +      lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
    +      {
    +         /* +/- X face */
    +         LLVMValueRef sign = lp_build_sgn(float_bld, rx);
    +         LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
    +         *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
    +         *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
    +         *face = lp_build_cube_face(bld, rx,
    +                                    PIPE_TEX_FACE_POS_X,
    +                                    PIPE_TEX_FACE_NEG_X);
    +      }
    +      lp_build_else(&if_ctx);
    +      {
    +         struct lp_build_flow_context *flow_ctx2;
    +         struct lp_build_if_state if_ctx2;
    +
    +         LLVMValueRef face_s2 = bld->coord_bld.undef;
    +         LLVMValueRef face_t2 = bld->coord_bld.undef;
    +         LLVMValueRef face2 = bld->int_bld.undef;
    +
    +         flow_ctx2 = lp_build_flow_create(bld->builder);
    +         lp_build_flow_scope_begin(flow_ctx2);
    +         lp_build_flow_scope_declare(flow_ctx2, &face_s2);
    +         lp_build_flow_scope_declare(flow_ctx2, &face_t2);
    +         lp_build_flow_scope_declare(flow_ctx2, &face2);
    +
    +         ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
    +
    +         lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
    +         {
    +            /* +/- Y face */
    +            LLVMValueRef sign = lp_build_sgn(float_bld, ry);
    +            LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
    +            face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
    +            face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
    +            face2 = lp_build_cube_face(bld, ry,
    +                                       PIPE_TEX_FACE_POS_Y,
    +                                       PIPE_TEX_FACE_NEG_Y);
    +         }
    +         lp_build_else(&if_ctx2);
    +         {
    +            /* +/- Z face */
    +            LLVMValueRef sign = lp_build_sgn(float_bld, rz);
    +            LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
    +            face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
    +            face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
    +            face2 = lp_build_cube_face(bld, rz,
    +                                       PIPE_TEX_FACE_POS_Z,
    +                                       PIPE_TEX_FACE_NEG_Z);
    +         }
    +         lp_build_endif(&if_ctx2);
    +         lp_build_flow_scope_end(flow_ctx2);
    +         lp_build_flow_destroy(flow_ctx2);
    +
    +         *face_s = face_s2;
    +         *face_t = face_t2;
    +         *face = face2;
    +      }
    +
    +      lp_build_endif(&if_ctx);
    +      lp_build_flow_scope_end(flow_ctx);
    +      lp_build_flow_destroy(flow_ctx);
    +   }
    +}
    +
    +
    +
    +/**
    + * General texture sampling codegen.
    + * This function handles texture sampling for all texture targets (1D,
    + * 2D, 3D, cube) and all filtering modes.
    + */
    +static void
    +lp_build_sample_general(struct lp_build_sample_context *bld,
    +                        unsigned unit,
    +                        LLVMValueRef s,
    +                        LLVMValueRef t,
    +                        LLVMValueRef r,
    +                        LLVMValueRef width,
    +                        LLVMValueRef height,
    +                        LLVMValueRef depth,
    +                        LLVMValueRef width_vec,
    +                        LLVMValueRef height_vec,
    +                        LLVMValueRef depth_vec,
    +                        LLVMValueRef row_stride_array,
    +                        LLVMValueRef img_stride_vec,
    +                        LLVMValueRef data_array,
    +                        LLVMValueRef *colors_out)
    +{
    +   const unsigned mip_filter = bld->static_state->min_mip_filter;
    +   const unsigned min_filter = bld->static_state->min_img_filter;
    +   const unsigned mag_filter = bld->static_state->mag_img_filter;
    +   const int dims = texture_dims(bld->static_state->target);
    +   LLVMValueRef lod, lod_fpart;
    +   LLVMValueRef ilevel0, ilevel1, ilevel0_vec, ilevel1_vec;
    +   LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
    +   LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
    +   LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
    +   LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
    +   LLVMValueRef data_ptr0, data_ptr1;
    +   int chan;
    +
    +   /*
    +   printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
    +          mip_filter, min_filter, mag_filter);
    +   */
    +
    +   /*
    +    * Compute the level of detail (mipmap level index(es)).
    +    */
    +   if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
    +      /* always use mip level 0 */
    +      ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
    +   }
    +   else {
    +      /* compute float LOD */
    +      lod = lp_build_lod_selector(bld, s, t, r, width, height, depth);
    +
    +      if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
    +         lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
    +      }
    +      else {
    +         assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
    +         lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
    +                                    &lod_fpart);
    +         lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
    +      }
    +   }
    +
    +   /*
    +    * Convert scalar integer mipmap levels into vectors.
    +    */
    +   ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
    +   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
    +      ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
    +
    +   /*
    +    * Compute width, height at mipmap level 'ilevel0'
    +    */
    +   width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
    +   if (dims >= 2) {
    +      height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
    +      row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
    +                                                      ilevel0);
    +      if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
    +         img_stride0_vec = lp_build_mul(&bld->int_coord_bld,
    +                                        row_stride0_vec, height0_vec);
    +         if (dims == 3) {
    +            depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
    +         }
    +      }
    +   }
    +   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
    +      /* compute width, height, depth for second mipmap level at ilevel1 */
    +      width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
    +      if (dims >= 2) {
    +         height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
    +         row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
    +                                                         ilevel1);
    +         if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
    +            img_stride1_vec = lp_build_mul(&bld->int_coord_bld,
    +                                           row_stride1_vec, height1_vec);
    +            if (dims ==3) {
    +               depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
    +            }
    +         }
    +      }
    +   }
    +
    +   /*
    +    * Choose cube face, recompute texcoords.
    +    */
    +   if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
    +      LLVMValueRef face, face_s, face_t;
    +      lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
    +      s = face_s; /* vec */
    +      t = face_t; /* vec */
    +      /* use 'r' to indicate cube face */
    +      r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
    +   }
    +
    +   /*
    +    * Get pointer(s) to image data for mipmap level(s).
    +    */
    +   data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
    +   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
    +      data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
    +   }
    +
    +   /*
    +    * Get/interpolate texture colors.
    +    */
    +   /* XXX temporarily force this path: */
    +   if (1 /*min_filter == mag_filter*/) {
    +      /* same filter for minification or magnification */
    +      LLVMValueRef colors0[4], colors1[4];
    +
    +      if (min_filter == PIPE_TEX_FILTER_NEAREST) {
    +         lp_build_sample_image_nearest(bld,
    +                                       width0_vec, height0_vec, depth0_vec,
    +                                       row_stride0_vec, img_stride0_vec,
    +                                       data_ptr0, s, t, r, colors0);
    +
    +         if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
    +            /* sample the second mipmap level, and interp */
    +            lp_build_sample_image_nearest(bld,
    +                                          width1_vec, height1_vec, depth1_vec,
    +                                          row_stride1_vec, img_stride1_vec,
    +                                          data_ptr1, s, t, r, colors1);
    +         }
    +      }
    +      else {
    +         assert(min_filter == PIPE_TEX_FILTER_LINEAR);
    +
    +         lp_build_sample_image_linear(bld,
    +                                      width0_vec, height0_vec, depth0_vec,
    +                                      row_stride0_vec, img_stride0_vec,
    +                                      data_ptr0, s, t, r, colors0);
    +
    +
    +         if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
    +            /* sample the second mipmap level, and interp */
    +            lp_build_sample_image_linear(bld,
    +                                         width1_vec, height1_vec, depth1_vec,
    +                                         row_stride1_vec, img_stride1_vec,
    +                                         data_ptr1, s, t, r, colors1);
    +         }
    +      }
    +
    +      if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
    +         /* interpolate samples from the two mipmap levels */
    +         for (chan = 0; chan < 4; chan++) {
    +            colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
    +                                             colors0[chan], colors1[chan]);
    +         }
    +      }
    +      else {
    +         /* use first/only level's colors */
    +         for (chan = 0; chan < 4; chan++) {
    +            colors_out[chan] = colors0[chan];
    +         }
    +      }
    +   }
    +   else {
    +      /* emit conditional to choose min image filter or mag image filter
    +       * depending on the lod being >0 or <= 0, respectively.
    +       */
    +      abort();
    +   }
    +}
    +
    +
    +
     static void
     lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
                               struct lp_type dst_type,
    @@ -1112,7 +1635,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
                                   LLVMValueRef t,
                                   LLVMValueRef width,
                                   LLVMValueRef height,
    -                              LLVMValueRef stride,
    +                              LLVMValueRef stride_array,
                                   LLVMValueRef data_array,
                                   LLVMValueRef *texel)
     {
    @@ -1129,6 +1652,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
        LLVMValueRef neighbors_hi[2][2];
        LLVMValueRef packed, packed_lo, packed_hi;
        LLVMValueRef unswizzled[4];
    +   LLVMValueRef stride;
     
        lp_build_context_init(&i32, builder, lp_type_int_vec(32));
        lp_build_context_init(&h16, builder, lp_type_ufixed(16));
    @@ -1236,6 +1760,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
           t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
        }
     
    +   stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
    +
        /*
         * Fetch the pixels as 4 x 32bit (rgba order might differ):
         *
    @@ -1359,12 +1885,12 @@ lp_build_sample_soa(LLVMBuilderRef builder,
        struct lp_build_sample_context bld;
        LLVMValueRef width, width_vec;
        LLVMValueRef height, height_vec;
    -   LLVMValueRef stride, stride_vec;
    +   LLVMValueRef depth, depth_vec;
    +   LLVMValueRef stride_array;
        LLVMValueRef data_array;
        LLVMValueRef s;
        LLVMValueRef t;
        LLVMValueRef r;
    -   boolean done = FALSE;
     
        (void) lp_build_lod_selector;   /* temporary to silence warning */
        (void) lp_build_nearest_mip_level;
    @@ -1395,7 +1921,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
        /* Get the dynamic state */
        width = dynamic_state->width(dynamic_state, builder, unit);
        height = dynamic_state->height(dynamic_state, builder, unit);
    -   stride = dynamic_state->stride(dynamic_state, builder, unit);
    +   depth = dynamic_state->depth(dynamic_state, builder, unit);
    +   stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
        data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
        /* Note that data_array is an array[level] of pointers to texture images */
     
    @@ -1405,58 +1932,26 @@ lp_build_sample_soa(LLVMBuilderRef builder,
     
        width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
        height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
    -   stride_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
    +   depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
     
    -   if(static_state->target == PIPE_TEXTURE_1D)
    -      t = bld.coord_bld.zero;
    -
    -   switch (static_state->min_mip_filter) {
    -   case PIPE_TEX_MIPFILTER_NONE:
    -      break;
    -   case PIPE_TEX_MIPFILTER_NEAREST:
    -
    -      switch (static_state->min_img_filter) {
    -      case PIPE_TEX_FILTER_NEAREST:
    -         lp_build_sample_2d_nearest_mip_nearest_soa(&bld, unit,
    -                                                    s, t,
    -                                                    width, height,
    -                                                    width_vec, height_vec,
    -                                                    stride_vec,
    -                                                    data_array, texel);
    -         done = TRUE;
    -         break;
    -      }
    -
    -      break;
    -   case PIPE_TEX_MIPFILTER_LINEAR:
    -      break;
    -   default:
    -      assert(0 && "invalid mip filter");
    +   if (lp_format_is_rgba8(bld.format_desc) &&
    +       static_state->target == PIPE_TEXTURE_2D &&
    +       static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
    +       static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
    +       static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
    +       is_simple_wrap_mode(static_state->wrap_s) &&
    +       is_simple_wrap_mode(static_state->wrap_t)) {
    +      /* special case */
    +      lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
    +                                    stride_array, data_array, texel);
        }
    -
    -   if (!done) {
    -      switch (static_state->min_img_filter) {
    -      case PIPE_TEX_FILTER_NEAREST:
    -         lp_build_sample_2d_nearest_soa(&bld, s, t, width_vec, height_vec,
    -                                        stride_vec, data_array, texel);
    -         break;
    -      case PIPE_TEX_FILTER_LINEAR:
    -         if(lp_format_is_rgba8(bld.format_desc) &&
    -            is_simple_wrap_mode(static_state->wrap_s) &&
    -            is_simple_wrap_mode(static_state->wrap_t))
    -            lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
    -                                          stride_vec, data_array, texel);
    -         else
    -            lp_build_sample_2d_linear_soa(&bld, s, t, width_vec, height_vec,
    -                                          stride_vec, data_array, texel);
    -         break;
    -      default:
    -         assert(0);
    -      }
    +   else {
    +      lp_build_sample_general(&bld, unit, s, t, r,
    +                              width, height, depth,
    +                              width_vec, height_vec, depth_vec,
    +                              stride_array, NULL, data_array,
    +                              texel);
        }
     
    -   /* FIXME: respect static_state->min_mip_filter */;
    -   /* FIXME: respect static_state->mag_img_filter */;
    -
        lp_build_sample_compare(&bld, r, texel);
     }
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h
    index 740392f5611..34478c10f51 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h
    @@ -37,7 +37,7 @@
     #define LP_BLD_STRUCT_H
     
     
    -#include   
    +#include "os/os_llvm.h"
     #include 
     
     #include "util/u_debug.h"
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
    index b9472127a63..57b5cc079f2 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
    @@ -37,7 +37,7 @@
     #define LP_BLD_SWIZZLE_H
     
     
    -#include   
    +#include "os/os_llvm.h"
     
     
     struct lp_type;
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
    index eddb7a83fa2..0f2f8a65b10 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
    @@ -35,7 +35,7 @@
     #ifndef LP_BLD_TGSI_H
     #define LP_BLD_TGSI_H
     
    -#include 
    +#include "os/os_llvm.h"
     
     
     struct tgsi_token;
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
    index fbb664d43a0..5ec59d636cb 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
    @@ -41,6 +41,7 @@
     #include "util/u_debug.h"
     #include "util/u_math.h"
     #include "util/u_memory.h"
    +#include "tgsi/tgsi_dump.h"
     #include "tgsi/tgsi_info.h"
     #include "tgsi/tgsi_parse.h"
     #include "tgsi/tgsi_util.h"
    @@ -95,6 +96,19 @@ struct lp_exec_mask {
        int cond_stack_size;
        LLVMValueRef cond_mask;
     
    +   LLVMValueRef break_stack[LP_TGSI_MAX_NESTING];
    +   int break_stack_size;
    +   LLVMValueRef break_mask;
    +
    +   LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING];
    +   int cont_stack_size;
    +   LLVMValueRef cont_mask;
    +
    +   LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING];
    +   int loop_stack_size;
    +   LLVMBasicBlockRef loop_block;
    +
    +
        LLVMValueRef exec_mask;
     };
     
    @@ -145,14 +159,33 @@ static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context
        mask->bld = bld;
        mask->has_mask = FALSE;
        mask->cond_stack_size = 0;
    +   mask->loop_stack_size = 0;
    +   mask->break_stack_size = 0;
    +   mask->cont_stack_size = 0;
     
        mask->int_vec_type = lp_build_int_vec_type(mask->bld->type);
     }
     
     static void lp_exec_mask_update(struct lp_exec_mask *mask)
     {
    -   mask->exec_mask = mask->cond_mask;
    -   mask->has_mask = (mask->cond_stack_size > 0);
    +   if (mask->loop_stack_size) {
    +      /*for loops we need to update the entire mask at
    +       * runtime */
    +      LLVMValueRef tmp;
    +      tmp = LLVMBuildAnd(mask->bld->builder,
    +                         mask->cont_mask,
    +                         mask->break_mask,
    +                         "maskcb");
    +      mask->exec_mask = LLVMBuildAnd(mask->bld->builder,
    +                                     mask->cond_mask,
    +                                     tmp,
    +                                     "maskfull");
    +   } else
    +      mask->exec_mask = mask->cond_mask;
    +
    +
    +   mask->has_mask = (mask->cond_stack_size > 0 ||
    +                     mask->loop_stack_size > 0);
     }
     
     static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
    @@ -189,6 +222,89 @@ static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
        lp_exec_mask_update(mask);
     }
     
    +static void lp_exec_bgnloop(struct lp_exec_mask *mask)
    +{
    +
    +   if (mask->cont_stack_size == 0)
    +      mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type);
    +   if (mask->cont_stack_size == 0)
    +      mask->break_mask = LLVMConstAllOnes(mask->int_vec_type);
    +   if (mask->cond_stack_size == 0)
    +      mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type);
    +   mask->loop_stack[mask->loop_stack_size++] = mask->loop_block;
    +   mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop");
    +   LLVMBuildBr(mask->bld->builder, mask->loop_block);
    +   LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block);
    +
    +   lp_exec_mask_update(mask);
    +}
    +
    +static void lp_exec_break(struct lp_exec_mask *mask)
    +{
    +   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
    +                                         mask->exec_mask,
    +                                         "break");
    +
    +   mask->break_stack[mask->break_stack_size++] = mask->break_mask;
    +   if (mask->break_stack_size > 1) {
    +      mask->break_mask = LLVMBuildAnd(mask->bld->builder,
    +                                      mask->break_mask,
    +                                      exec_mask, "break_full");
    +   } else
    +      mask->break_mask = exec_mask;
    +
    +   lp_exec_mask_update(mask);
    +}
    +
    +static void lp_exec_continue(struct lp_exec_mask *mask)
    +{
    +   LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder,
    +                                         mask->exec_mask,
    +                                         "");
    +
    +   mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask;
    +   if (mask->cont_stack_size > 1) {
    +      mask->cont_mask = LLVMBuildAnd(mask->bld->builder,
    +                                     mask->cont_mask,
    +                                     exec_mask, "");
    +   } else
    +      mask->cont_mask = exec_mask;
    +
    +   lp_exec_mask_update(mask);
    +}
    +
    +
    +static void lp_exec_endloop(struct lp_exec_mask *mask)
    +{
    +   LLVMBasicBlockRef endloop;
    +   LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width*
    +                                      mask->bld->type.length);
    +   /* i1cond = (mask == 0) */
    +   LLVMValueRef i1cond = LLVMBuildICmp(
    +      mask->bld->builder,
    +      LLVMIntNE,
    +      LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""),
    +      LLVMConstNull(reg_type), "");
    +
    +   endloop = lp_build_insert_new_block(mask->bld->builder, "endloop");
    +
    +   LLVMBuildCondBr(mask->bld->builder,
    +                   i1cond, mask->loop_block, endloop);
    +
    +   LLVMPositionBuilderAtEnd(mask->bld->builder, endloop);
    +
    +   mask->loop_block = mask->loop_stack[--mask->loop_stack_size];
    +   /* pop the break mask */
    +   if (mask->cont_stack_size) {
    +      mask->cont_mask = mask->cont_stack[--mask->cont_stack_size];
    +   }
    +   if (mask->break_stack_size) {
    +      mask->break_mask = mask->cont_stack[--mask->break_stack_size];
    +   }
    +
    +   lp_exec_mask_update(mask);
    +}
    +
     static void lp_exec_mask_store(struct lp_exec_mask *mask,
                                    LLVMValueRef val,
                                    LLVMValueRef dst)
    @@ -1363,14 +1479,15 @@ emit_instruction(
        case TGSI_OPCODE_TXP:
           emit_tex( bld, inst, FALSE, TRUE, dst0 );
           break;
    -      
    +
        case TGSI_OPCODE_BRK:
    -      /* FIXME */
    -      return 0;
    +      lp_exec_break(&bld->exec_mask);
           break;
     
        case TGSI_OPCODE_IF:
           tmp0 = emit_fetch(bld, inst, 0, CHAN_X);
    +      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL,
    +                          tmp0, bld->base.zero);
           lp_exec_mask_cond_push(&bld->exec_mask, tmp0);
           break;
     
    @@ -1380,6 +1497,10 @@ emit_instruction(
           return 0;
           break;
     
    +   case TGSI_OPCODE_BGNLOOP:
    +      lp_exec_bgnloop(&bld->exec_mask);
    +      break;
    +
        case TGSI_OPCODE_REP:
           /* deprecated */
           assert(0);
    @@ -1400,6 +1521,10 @@ emit_instruction(
           return 0;
           break;
     
    +   case TGSI_OPCODE_ENDLOOP:
    +      lp_exec_endloop(&bld->exec_mask);
    +      break;
    +
        case TGSI_OPCODE_ENDREP:
           /* deprecated */
           assert(0);
    @@ -1499,8 +1624,7 @@ emit_instruction(
           break;
     
        case TGSI_OPCODE_CONT:
    -      /* FIXME */
    -      return 0;
    +      lp_exec_continue(&bld->exec_mask);
           break;
     
        case TGSI_OPCODE_EMIT:
    @@ -1603,7 +1727,14 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
              assert( 0 );
           }
        }
    -
    +   if (0) {
    +      LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
    +      LLVMValueRef function = LLVMGetBasicBlockParent(block);
    +      debug_printf("11111111111111111111111111111 \n");
    +      tgsi_dump(tokens, 0);
    +      LLVMDumpValue(function);
    +      debug_printf("2222222222222222222222222222 \n");
    +   }
        tgsi_parse_free( &parse );
     }
     
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.c b/src/gallium/auxiliary/gallivm/lp_bld_type.c
    index c327ba045a6..796af88caad 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_type.c
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.c
    @@ -58,7 +58,10 @@ LLVMTypeRef
     lp_build_vec_type(struct lp_type type)
     {
        LLVMTypeRef elem_type = lp_build_elem_type(type);
    -   return LLVMVectorType(elem_type, type.length);
    +   if (type.length == 1)
    +      return elem_type;
    +   else
    +      return LLVMVectorType(elem_type, type.length);
     }
     
     
    @@ -115,6 +118,9 @@ lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type)
        if(!vec_type)
           return FALSE;
     
    +   if (type.length == 1)
    +      return lp_check_elem_type(type, vec_type);
    +
        if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind)
           return FALSE;
     
    @@ -153,7 +159,10 @@ LLVMTypeRef
     lp_build_int_vec_type(struct lp_type type)
     {
        LLVMTypeRef elem_type = lp_build_int_elem_type(type);
    -   return LLVMVectorType(elem_type, type.length);
    +   if (type.length == 1)
    +      return elem_type;
    +   else
    +      return LLVMVectorType(elem_type, type.length);
     }
     
     
    diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
    index 4daa904e633..5b351476ac2 100644
    --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
    +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
    @@ -37,7 +37,7 @@
     #define LP_BLD_TYPE_H
     
     
    -#include   
    +#include "os/os_llvm.h"
     
     #include 
     
    diff --git a/src/gallium/auxiliary/os/os_llvm.h b/src/gallium/auxiliary/os/os_llvm.h
    new file mode 100644
    index 00000000000..d5edfbfe923
    --- /dev/null
    +++ b/src/gallium/auxiliary/os/os_llvm.h
    @@ -0,0 +1,47 @@
    +/**************************************************************************
    + *
    + * Copyright 2010 VMware, Inc.
    + * All Rights Reserved.
    + *
    + * Permission is hereby granted, free of charge, to any person obtaining a
    + * copy of this software and associated documentation files (the
    + * "Software"), to deal in the Software without restriction, including
    + * without limitation the rights to use, copy, modify, merge, publish,
    + * distribute, sub license, and/or sell copies of the Software, and to
    + * permit persons to whom the Software is furnished to do so, subject to
    + * the following conditions:
    + *
    + * The above copyright notice and this permission notice (including the
    + * next paragraph) shall be included in all copies or substantial portions
    + * of the Software.
    + *
    + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
    + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
    + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    + *
    + **************************************************************************/
    +
    +/**
    + * @file
    + * Wrapper for LLVM header file #includes.
    + */
    +
    +
    +#ifndef OS_LLVM_H
    +#define OS_LLVM_H
    +
    +
    +#include   
    +
    +
    +/** Set version to 0 if missing to avoid #ifdef HAVE_LLVM everywhere */
    +#ifndef HAVE_LLVM
    +#define HAVE_LLVM 0x0
    +#endif
    +
    +
    +#endif /* OS_LLVM_H */
    diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
    index 5b55c1b3747..7e0f67a76b0 100644
    --- a/src/gallium/auxiliary/os/os_time.h
    +++ b/src/gallium/auxiliary/os/os_time.h
    @@ -71,7 +71,7 @@ os_time_sleep(int64_t usecs);
     /*
      * Helper function for detecting time outs, taking in account overflow.
      *
    - * Returns true the the current time has elapsed beyond the specified interval.
    + * Returns true if the current time has elapsed beyond the specified interval.
      */
     static INLINE boolean
     os_time_timeout(int64_t start,
    diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
    index c9ec2b32bfe..c3ec9ae3f4b 100644
    --- a/src/gallium/auxiliary/translate/translate_generic.c
    +++ b/src/gallium/auxiliary/translate/translate_generic.c
    @@ -393,10 +393,10 @@ static fetch_func get_fetch_func( enum pipe_format format )
           return &fetch_R8G8B8A8_SSCALED;
     
        case PIPE_FORMAT_B8G8R8A8_UNORM:
    -      return &fetch_A8R8G8B8_UNORM;
    +      return &fetch_B8G8R8A8_UNORM;
     
        case PIPE_FORMAT_A8R8G8B8_UNORM:
    -      return &fetch_B8G8R8A8_UNORM;
    +      return &fetch_A8R8G8B8_UNORM;
     
        case PIPE_FORMAT_R32_FIXED:
           return &fetch_R32_FIXED;
    @@ -552,10 +552,10 @@ static emit_func get_emit_func( enum pipe_format format )
           return &emit_R8G8B8A8_SSCALED;
     
        case PIPE_FORMAT_B8G8R8A8_UNORM:
    -      return &emit_A8R8G8B8_UNORM;
    +      return &emit_B8G8R8A8_UNORM;
     
        case PIPE_FORMAT_A8R8G8B8_UNORM:
    -      return &emit_B8G8R8A8_UNORM;
    +      return &emit_A8R8G8B8_UNORM;
     
        default:
           assert(0); 
    diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
    index 03e093c11ea..c13e7427387 100644
    --- a/src/gallium/auxiliary/translate/translate_sse.c
    +++ b/src/gallium/auxiliary/translate/translate_sse.c
    @@ -336,7 +336,7 @@ static boolean translate_attr( struct translate_sse *p,
        case PIPE_FORMAT_R32G32B32A32_FLOAT:
           emit_load_R32G32B32A32(p, dataXMM, srcECX);
           break;
    -   case PIPE_FORMAT_A8R8G8B8_UNORM:
    +   case PIPE_FORMAT_B8G8R8A8_UNORM:
           emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
           emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
           break;
    @@ -360,7 +360,7 @@ static boolean translate_attr( struct translate_sse *p,
        case PIPE_FORMAT_R32G32B32A32_FLOAT:
           emit_store_R32G32B32A32(p, dstEAX, dataXMM);
           break;
    -   case PIPE_FORMAT_A8R8G8B8_UNORM:
    +   case PIPE_FORMAT_B8G8R8A8_UNORM:
           emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
           emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
           break;
    diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
    index 71cf9525d39..1692987e8e3 100644
    --- a/src/gallium/auxiliary/util/u_blitter.c
    +++ b/src/gallium/auxiliary/util/u_blitter.c
    @@ -109,9 +109,9 @@ struct blitter_context_priv
     struct blitter_context *util_blitter_create(struct pipe_context *pipe)
     {
        struct blitter_context_priv *ctx;
    -   struct pipe_blend_state blend = { 0 };
    -   struct pipe_depth_stencil_alpha_state dsa = { { 0 } };
    -   struct pipe_rasterizer_state rs_state = { 0 };
    +   struct pipe_blend_state blend;
    +   struct pipe_depth_stencil_alpha_state dsa;
    +   struct pipe_rasterizer_state rs_state;
        struct pipe_sampler_state *sampler_state;
        struct pipe_vertex_element velem[2];
        unsigned i;
    @@ -134,12 +134,14 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
        ctx->blitter.saved_num_sampler_states = ~0;
     
        /* blend state objects */
    +   memset(&blend, 0, sizeof(blend));
        ctx->blend_keep_color = pipe->create_blend_state(pipe, &blend);
     
        blend.rt[0].colormask = PIPE_MASK_RGBA;
        ctx->blend_write_color = pipe->create_blend_state(pipe, &blend);
     
        /* depth stencil alpha state objects */
    +   memset(&dsa, 0, sizeof(dsa));
        ctx->dsa_keep_depth_stencil =
           pipe->create_depth_stencil_alpha_state(pipe, &dsa);
     
    diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
    index 94be682c4b1..e997cfa8a38 100644
    --- a/src/gallium/auxiliary/util/u_debug.c
    +++ b/src/gallium/auxiliary/util/u_debug.c
    @@ -421,26 +421,31 @@ void debug_dump_image(const char *prefix,
     #endif
     }
     
    -void debug_dump_surface(const char *prefix,
    +void debug_dump_surface(struct pipe_context *pipe,
    +			const char *prefix,
                             struct pipe_surface *surface)     
     {
        struct pipe_texture *texture;
    -   struct pipe_screen *screen;
        struct pipe_transfer *transfer;
        void *data;
     
        if (!surface)
           return;
     
    +   /* XXX: this doesn't necessarily work, as the driver may be using
    +    * temporary storage for the surface which hasn't been propagated
    +    * back into the texture.  Need to nail down the semantics of views
    +    * and transfers a bit better before we can say if extra work needs
    +    * to be done here:
    +    */
        texture = surface->texture;
    -   screen = texture->screen;
     
    -   transfer = screen->get_tex_transfer(screen, texture, surface->face,
    -                                       surface->level, surface->zslice,
    -                                       PIPE_TRANSFER_READ, 0, 0, surface->width,
    -                                       surface->height);
    +   transfer = pipe->get_tex_transfer(pipe, texture, surface->face,
    +				     surface->level, surface->zslice,
    +				     PIPE_TRANSFER_READ, 0, 0, surface->width,
    +				     surface->height);
        
    -   data = screen->transfer_map(screen, transfer);
    +   data = pipe->transfer_map(pipe, transfer);
        if(!data)
           goto error;
        
    @@ -452,13 +457,14 @@ void debug_dump_surface(const char *prefix,
                         transfer->stride,
                         data);
        
    -   screen->transfer_unmap(screen, transfer);
    +   pipe->transfer_unmap(pipe, transfer);
     error:
    -   screen->tex_transfer_destroy(transfer);
    +   pipe->tex_transfer_destroy(pipe, transfer);
     }
     
     
    -void debug_dump_texture(const char *prefix,
    +void debug_dump_texture(struct pipe_context *pipe,
    +                        const char *prefix,
                             struct pipe_texture *texture)
     {
        struct pipe_surface *surface;
    @@ -473,7 +479,7 @@ void debug_dump_texture(const char *prefix,
        surface = screen->get_tex_surface(screen, texture, 0, 0, 0,
                                          PIPE_TEXTURE_USAGE_SAMPLER);
        if (surface) {
    -      debug_dump_surface(prefix, surface);
    +      debug_dump_surface(pipe, prefix, surface);
           screen->tex_surface_destroy(surface);
        }
     }
    @@ -511,27 +517,28 @@ struct bmp_rgb_quad {
     };
     
     void
    -debug_dump_surface_bmp(const char *filename,
    +debug_dump_surface_bmp(struct pipe_context *pipe,
    +		       const char *filename,
                            struct pipe_surface *surface)
     {
     #ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT
        struct pipe_transfer *transfer;
        struct pipe_texture *texture = surface->texture;
    -   struct pipe_screen *screen = texture->screen;
     
    -   transfer = screen->get_tex_transfer(screen, texture, surface->face,
    -                                       surface->level, surface->zslice,
    -                                       PIPE_TRANSFER_READ, 0, 0, surface->width,
    -                                       surface->height);
    +   transfer = pipe->get_tex_transfer(pipe, texture, surface->face,
    +				     surface->level, surface->zslice,
    +				     PIPE_TRANSFER_READ, 0, 0, surface->width,
    +				     surface->height);
     
    -   debug_dump_transfer_bmp(filename, transfer);
    +   debug_dump_transfer_bmp(pipe, filename, transfer);
     
    -   screen->tex_transfer_destroy(transfer);
    +   pipe->tex_transfer_destroy(pipe, transfer);
     #endif
     }
     
     void
    -debug_dump_transfer_bmp(const char *filename,
    +debug_dump_transfer_bmp(struct pipe_context *pipe,
    +                        const char *filename,
                             struct pipe_transfer *transfer)
     {
     #ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT
    @@ -544,7 +551,7 @@ debug_dump_transfer_bmp(const char *filename,
        if(!rgba)
           goto error1;
     
    -   pipe_get_tile_rgba(transfer, 0, 0,
    +   pipe_get_tile_rgba(pipe, transfer, 0, 0,
                           transfer->width, transfer->height,
                           rgba);
     
    diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
    index 0f4768f3444..98addeb372e 100644
    --- a/src/gallium/auxiliary/util/u_debug.h
    +++ b/src/gallium/auxiliary/util/u_debug.h
    @@ -312,6 +312,7 @@ debug_memory_end(unsigned long beginning);
     
     
     #ifdef DEBUG
    +struct pipe_context;
     struct pipe_surface;
     struct pipe_transfer;
     struct pipe_texture;
    @@ -321,21 +322,25 @@ void debug_dump_image(const char *prefix,
                           unsigned width, unsigned height,
                           unsigned stride,
                           const void *data);
    -void debug_dump_surface(const char *prefix,
    +void debug_dump_surface(struct pipe_context *pipe,
    +			const char *prefix,
                             struct pipe_surface *surface);   
    -void debug_dump_texture(const char *prefix,
    +void debug_dump_texture(struct pipe_context *pipe,
    +			const char *prefix,
                             struct pipe_texture *texture);
    -void debug_dump_surface_bmp(const char *filename,
    +void debug_dump_surface_bmp(struct pipe_context *pipe,
    +                            const char *filename,
                                 struct pipe_surface *surface);
    -void debug_dump_transfer_bmp(const char *filename,
    +void debug_dump_transfer_bmp(struct pipe_context *pipe,
    +                             const char *filename,
                                  struct pipe_transfer *transfer);
     void debug_dump_float_rgba_bmp(const char *filename,
                                    unsigned width, unsigned height,
                                    float *rgba, unsigned stride);
     #else
     #define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0)
    -#define debug_dump_surface(prefix, surface) ((void)0)
    -#define debug_dump_surface_bmp(filename, surface) ((void)0)
    +#define debug_dump_surface(pipe, prefix, surface) ((void)0)
    +#define debug_dump_surface_bmp(pipe, filename, surface) ((void)0)
     #define debug_dump_transfer_bmp(filename, transfer) ((void)0)
     #define debug_dump_float_rgba_bmp(filename, width, height, rgba, stride) ((void)0)
     #endif
    diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
    index d421bee8efe..5c51b53d7bd 100644
    --- a/src/gallium/auxiliary/util/u_gen_mipmap.c
    +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
    @@ -1119,7 +1119,6 @@ make_1d_mipmap(struct gen_mipmap_state *ctx,
                    uint face, uint baseLevel, uint lastLevel)
     {
        struct pipe_context *pipe = ctx->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        const uint zslice = 0;
        uint dstLevel;
     
    @@ -1128,27 +1127,27 @@ make_1d_mipmap(struct gen_mipmap_state *ctx,
           struct pipe_transfer *srcTrans, *dstTrans;
           void *srcMap, *dstMap;
           
    -      srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
    +      srcTrans = pipe->get_tex_transfer(pipe, pt, face, srcLevel, zslice,
                                               PIPE_TRANSFER_READ, 0, 0,
                                               u_minify(pt->width0, srcLevel),
                                               u_minify(pt->height0, srcLevel));
    -      dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
    +      dstTrans = pipe->get_tex_transfer(pipe, pt, face, dstLevel, zslice,
                                               PIPE_TRANSFER_WRITE, 0, 0,
                                               u_minify(pt->width0, dstLevel),
                                               u_minify(pt->height0, dstLevel));
     
    -      srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
    -      dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
    +      srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans);
    +      dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans);
     
           reduce_1d(pt->format,
                     srcTrans->width, srcMap,
                     dstTrans->width, dstMap);
     
    -      screen->transfer_unmap(screen, srcTrans);
    -      screen->transfer_unmap(screen, dstTrans);
    +      pipe->transfer_unmap(pipe, srcTrans);
    +      pipe->transfer_unmap(pipe, dstTrans);
     
    -      screen->tex_transfer_destroy(srcTrans);
    -      screen->tex_transfer_destroy(dstTrans);
    +      pipe->tex_transfer_destroy(pipe, srcTrans);
    +      pipe->tex_transfer_destroy(pipe, dstTrans);
        }
     }
     
    @@ -1159,7 +1158,6 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
                    uint face, uint baseLevel, uint lastLevel)
     {
        struct pipe_context *pipe = ctx->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        const uint zslice = 0;
        uint dstLevel;
        
    @@ -1171,17 +1169,17 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
           struct pipe_transfer *srcTrans, *dstTrans;
           ubyte *srcMap, *dstMap;
           
    -      srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
    +      srcTrans = pipe->get_tex_transfer(pipe, pt, face, srcLevel, zslice,
                                               PIPE_TRANSFER_READ, 0, 0,
                                               u_minify(pt->width0, srcLevel),
                                               u_minify(pt->height0, srcLevel));
    -      dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
    +      dstTrans = pipe->get_tex_transfer(pipe, pt, face, dstLevel, zslice,
                                               PIPE_TRANSFER_WRITE, 0, 0,
                                               u_minify(pt->width0, dstLevel),
                                               u_minify(pt->height0, dstLevel));
     
    -      srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
    -      dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
    +      srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans);
    +      dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans);
     
           reduce_2d(pt->format,
                     srcTrans->width, srcTrans->height,
    @@ -1189,11 +1187,11 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
                     dstTrans->width, dstTrans->height,
                     dstTrans->stride, dstMap);
     
    -      screen->transfer_unmap(screen, srcTrans);
    -      screen->transfer_unmap(screen, dstTrans);
    +      pipe->transfer_unmap(pipe, srcTrans);
    +      pipe->transfer_unmap(pipe, dstTrans);
     
    -      screen->tex_transfer_destroy(srcTrans);
    -      screen->tex_transfer_destroy(dstTrans);
    +      pipe->tex_transfer_destroy(pipe, srcTrans);
    +      pipe->tex_transfer_destroy(pipe, dstTrans);
        }
     }
     
    @@ -1216,17 +1214,17 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
           struct pipe_transfer *srcTrans, *dstTrans;
           ubyte *srcMap, *dstMap;
           
    -      srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
    +      srcTrans = pipe->get_tex_transfer(pipe, pt, face, srcLevel, zslice,
                                               PIPE_TRANSFER_READ, 0, 0,
                                               u_minify(pt->width0, srcLevel),
                                               u_minify(pt->height0, srcLevel));
    -      dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
    +      dstTrans = pipe->get_tex_transfer(pipe, pt, face, dstLevel, zslice,
                                               PIPE_TRANSFER_WRITE, 0, 0,
                                               u_minify(pt->width0, dstLevel),
                                               u_minify(pt->height0, dstLevel));
     
    -      srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
    -      dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
    +      srcMap = (ubyte *) pipe->transfer_map(pipe, srcTrans);
    +      dstMap = (ubyte *) pipe->transfer_map(pipe, dstTrans);
     
           reduce_3d(pt->format,
                     srcTrans->width, srcTrans->height,
    @@ -1234,11 +1232,11 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
                     dstTrans->width, dstTrans->height,
                     dstTrans->stride, dstMap);
     
    -      screen->transfer_unmap(screen, srcTrans);
    -      screen->transfer_unmap(screen, dstTrans);
    +      pipe->transfer_unmap(pipe, srcTrans);
    +      pipe->transfer_unmap(pipe, dstTrans);
     
    -      screen->tex_transfer_destroy(srcTrans);
    -      screen->tex_transfer_destroy(dstTrans);
    +      pipe->tex_transfer_destroy(pipe, srcTrans);
    +      pipe->tex_transfer_destroy(pipe, dstTrans);
        }
     #else
        (void) reduce_3d;
    diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
    index 84d849aa901..e22ab188e11 100644
    --- a/src/gallium/auxiliary/util/u_inlines.h
    +++ b/src/gallium/auxiliary/util/u_inlines.h
    @@ -274,24 +274,24 @@ pipe_buffer_read(struct pipe_screen *screen,
     }
     
     static INLINE void *
    -pipe_transfer_map( struct pipe_transfer *transf )
    +pipe_transfer_map( struct pipe_context *context,
    +                   struct pipe_transfer *transf )
     {
    -   struct pipe_screen *screen = transf->texture->screen;
    -   return screen->transfer_map(screen, transf);
    +   return context->transfer_map(context, transf);
     }
     
     static INLINE void
    -pipe_transfer_unmap( struct pipe_transfer *transf )
    +pipe_transfer_unmap( struct pipe_context *context,
    +                     struct pipe_transfer *transf )
     {
    -   struct pipe_screen *screen = transf->texture->screen;
    -   screen->transfer_unmap(screen, transf);
    +   context->transfer_unmap(context, transf);
     }
     
     static INLINE void
    -pipe_transfer_destroy( struct pipe_transfer *transf )
    +pipe_transfer_destroy( struct pipe_context *context,
    +                       struct pipe_transfer *transfer )
     {
    -   struct pipe_screen *screen = transf->texture->screen;
    -   screen->tex_transfer_destroy(transf);
    +   context->tex_transfer_destroy(context, transfer);
     }
     
     static INLINE unsigned
    diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c
    index 8479161c744..e73797f1b7e 100644
    --- a/src/gallium/auxiliary/util/u_rect.c
    +++ b/src/gallium/auxiliary/util/u_rect.c
    @@ -169,7 +169,6 @@ util_surface_copy(struct pipe_context *pipe,
                       unsigned src_x, unsigned src_y, 
                       unsigned w, unsigned h)
     {
    -   struct pipe_screen *screen = pipe->screen;
        struct pipe_transfer *src_trans, *dst_trans;
        void *dst_map;
        const void *src_map;
    @@ -182,7 +181,7 @@ util_surface_copy(struct pipe_context *pipe,
        src_format = src->texture->format;
        dst_format = dst->texture->format;
     
    -   src_trans = screen->get_tex_transfer(screen,
    +   src_trans = pipe->get_tex_transfer(pipe,
                                             src->texture,
                                             src->face,
                                             src->level,
    @@ -190,7 +189,7 @@ util_surface_copy(struct pipe_context *pipe,
                                             PIPE_TRANSFER_READ,
                                             src_x, src_y, w, h);
     
    -   dst_trans = screen->get_tex_transfer(screen,
    +   dst_trans = pipe->get_tex_transfer(pipe,
                                             dst->texture,
                                             dst->face,
                                             dst->level,
    @@ -202,8 +201,8 @@ util_surface_copy(struct pipe_context *pipe,
        assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format));
        assert(util_format_get_blockheight(dst_format) == util_format_get_blockheight(src_format));
     
    -   src_map = pipe->screen->transfer_map(screen, src_trans);
    -   dst_map = pipe->screen->transfer_map(screen, dst_trans);
    +   src_map = pipe->transfer_map(pipe, src_trans);
    +   dst_map = pipe->transfer_map(pipe, dst_trans);
     
        assert(src_map);
        assert(dst_map);
    @@ -221,11 +220,11 @@ util_surface_copy(struct pipe_context *pipe,
                          do_flip ? h - 1 : 0);
        }
     
    -   pipe->screen->transfer_unmap(pipe->screen, src_trans);
    -   pipe->screen->transfer_unmap(pipe->screen, dst_trans);
    +   pipe->transfer_unmap(pipe, src_trans);
    +   pipe->transfer_unmap(pipe, dst_trans);
     
    -   screen->tex_transfer_destroy(src_trans);
    -   screen->tex_transfer_destroy(dst_trans);
    +   pipe->tex_transfer_destroy(pipe, src_trans);
    +   pipe->tex_transfer_destroy(pipe, dst_trans);
     }
     
     
    @@ -243,14 +242,13 @@ util_surface_fill(struct pipe_context *pipe,
                       unsigned dstx, unsigned dsty,
                       unsigned width, unsigned height, unsigned value)
     {
    -   struct pipe_screen *screen = pipe->screen;
        struct pipe_transfer *dst_trans;
        void *dst_map;
     
        assert(dst->texture);
        if (!dst->texture)
           return;
    -   dst_trans = screen->get_tex_transfer(screen,
    +   dst_trans = pipe->get_tex_transfer(pipe,
                                             dst->texture,
                                             dst->face,
                                             dst->level,
    @@ -258,7 +256,7 @@ util_surface_fill(struct pipe_context *pipe,
                                             PIPE_TRANSFER_WRITE,
                                             dstx, dsty, width, height);
     
    -   dst_map = pipe->screen->transfer_map(screen, dst_trans);
    +   dst_map = pipe->transfer_map(pipe, dst_trans);
     
        assert(dst_map);
     
    @@ -302,6 +300,6 @@ util_surface_fill(struct pipe_context *pipe,
           }
        }
     
    -   pipe->screen->transfer_unmap(pipe->screen, dst_trans);
    -   screen->tex_transfer_destroy(dst_trans);
    +   pipe->transfer_unmap(pipe, dst_trans);
    +   pipe->tex_transfer_destroy(pipe, dst_trans);
     }
    diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
    index 8a36d4d9d18..82e44192aaf 100644
    --- a/src/gallium/auxiliary/util/u_tile.c
    +++ b/src/gallium/auxiliary/util/u_tile.c
    @@ -45,11 +45,11 @@
      * Move raw block of pixels from transfer object to user memory.
      */
     void
    -pipe_get_tile_raw(struct pipe_transfer *pt,
    +pipe_get_tile_raw(struct pipe_context *pipe,
    +                  struct pipe_transfer *pt,
                       uint x, uint y, uint w, uint h,
                       void *dst, int dst_stride)
     {
    -   struct pipe_screen *screen = pt->texture->screen;
        const void *src;
     
        if (dst_stride == 0)
    @@ -58,14 +58,14 @@ pipe_get_tile_raw(struct pipe_transfer *pt,
        if (pipe_clip_tile(x, y, &w, &h, pt))
           return;
     
    -   src = screen->transfer_map(screen, pt);
    +   src = pipe->transfer_map(pipe, pt);
        assert(src);
        if(!src)
           return;
     
        util_copy_rect(dst, pt->texture->format, dst_stride, 0, 0, w, h, src, pt->stride, x, y);
     
    -   screen->transfer_unmap(screen, pt);
    +   pipe->transfer_unmap(pipe, pt);
     }
     
     
    @@ -73,11 +73,11 @@ pipe_get_tile_raw(struct pipe_transfer *pt,
      * Move raw block of pixels from user memory to transfer object.
      */
     void
    -pipe_put_tile_raw(struct pipe_transfer *pt,
    +pipe_put_tile_raw(struct pipe_context *pipe,
    +                  struct pipe_transfer *pt,
                       uint x, uint y, uint w, uint h,
                       const void *src, int src_stride)
     {
    -   struct pipe_screen *screen = pt->texture->screen;
        void *dst;
        enum pipe_format format = pt->texture->format;
     
    @@ -87,14 +87,14 @@ pipe_put_tile_raw(struct pipe_transfer *pt,
        if (pipe_clip_tile(x, y, &w, &h, pt))
           return;
     
    -   dst = screen->transfer_map(screen, pt);
    +   dst = pipe->transfer_map(pipe, pt);
        assert(dst);
        if(!dst)
           return;
     
        util_copy_rect(dst, format, pt->stride, x, y, w, h, src, src_stride, 0, 0);
     
    -   screen->transfer_unmap(screen, pt);
    +   pipe->transfer_unmap(pipe, pt);
     }
     
     
    @@ -1246,7 +1246,8 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
     
     
     void
    -pipe_get_tile_rgba(struct pipe_transfer *pt,
    +pipe_get_tile_rgba(struct pipe_context *pipe,
    +                   struct pipe_transfer *pt,
                        uint x, uint y, uint w, uint h,
                        float *p)
     {
    @@ -1265,7 +1266,7 @@ pipe_get_tile_rgba(struct pipe_transfer *pt,
        if(format == PIPE_FORMAT_UYVY || format == PIPE_FORMAT_YUYV)
           assert((x & 1) == 0);
     
    -   pipe_get_tile_raw(pt, x, y, w, h, packed, 0);
    +   pipe_get_tile_raw(pipe, pt, x, y, w, h, packed, 0);
     
        pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride);
     
    @@ -1274,7 +1275,8 @@ pipe_get_tile_rgba(struct pipe_transfer *pt,
     
     
     void
    -pipe_get_tile_swizzle(struct pipe_transfer *pt,
    +pipe_get_tile_swizzle(struct pipe_context *pipe,
    +		      struct pipe_transfer *pt,
                           uint x,
                           uint y,
                           uint w,
    @@ -1304,7 +1306,7 @@ pipe_get_tile_swizzle(struct pipe_transfer *pt,
           assert((x & 1) == 0);
        }
     
    -   pipe_get_tile_raw(pt, x, y, w, h, packed, 0);
    +   pipe_get_tile_raw(pipe, pt, x, y, w, h, packed, 0);
     
        pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride);
     
    @@ -1336,7 +1338,8 @@ pipe_get_tile_swizzle(struct pipe_transfer *pt,
     
     
     void
    -pipe_put_tile_rgba(struct pipe_transfer *pt,
    +pipe_put_tile_rgba(struct pipe_context *pipe,
    +                   struct pipe_transfer *pt,
                        uint x, uint y, uint w, uint h,
                        const float *p)
     {
    @@ -1425,7 +1428,7 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
                                0, 0, w, h);
        }
     
    -   pipe_put_tile_raw(pt, x, y, w, h, packed, 0);
    +   pipe_put_tile_raw(pipe, pt, x, y, w, h, packed, 0);
     
        FREE(packed);
     }
    @@ -1435,11 +1438,11 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
      * Get a block of Z values, converted to 32-bit range.
      */
     void
    -pipe_get_tile_z(struct pipe_transfer *pt,
    +pipe_get_tile_z(struct pipe_context *pipe,
    +                struct pipe_transfer *pt,
                     uint x, uint y, uint w, uint h,
                     uint *z)
     {
    -   struct pipe_screen *screen = pt->texture->screen;
        const uint dstStride = w;
        ubyte *map;
        uint *pDest = z;
    @@ -1449,7 +1452,7 @@ pipe_get_tile_z(struct pipe_transfer *pt,
        if (pipe_clip_tile(x, y, &w, &h, pt))
           return;
     
    -   map = (ubyte *)screen->transfer_map(screen, pt);
    +   map = (ubyte *)pipe->transfer_map(pipe, pt);
        if (!map) {
           assert(0);
           return;
    @@ -1515,16 +1518,16 @@ pipe_get_tile_z(struct pipe_transfer *pt,
           assert(0);
        }
     
    -   screen->transfer_unmap(screen, pt);
    +   pipe->transfer_unmap(pipe, pt);
     }
     
     
     void
    -pipe_put_tile_z(struct pipe_transfer *pt,
    +pipe_put_tile_z(struct pipe_context *pipe,
    +                struct pipe_transfer *pt,
                     uint x, uint y, uint w, uint h,
                     const uint *zSrc)
     {
    -   struct pipe_screen *screen = pt->texture->screen;
        const uint srcStride = w;
        const uint *ptrc = zSrc;
        ubyte *map;
    @@ -1534,7 +1537,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,
        if (pipe_clip_tile(x, y, &w, &h, pt))
           return;
     
    -   map = (ubyte *)screen->transfer_map(screen, pt);
    +   map = (ubyte *)pipe->transfer_map(pipe, pt);
        if (!map) {
           assert(0);
           return;
    @@ -1622,7 +1625,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,
           assert(0);
        }
     
    -   screen->transfer_unmap(screen, pt);
    +   pipe->transfer_unmap(pipe, pt);
     }
     
     
    diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h
    index d665fdb1bb1..1d8ce7d8cbc 100644
    --- a/src/gallium/auxiliary/util/u_tile.h
    +++ b/src/gallium/auxiliary/util/u_tile.h
    @@ -56,23 +56,27 @@ extern "C" {
     #endif
     
     void
    -pipe_get_tile_raw(struct pipe_transfer *pt,
    +pipe_get_tile_raw(struct pipe_context *pipe,
    +                  struct pipe_transfer *pt,
                       uint x, uint y, uint w, uint h,
                       void *p, int dst_stride);
     
     void
    -pipe_put_tile_raw(struct pipe_transfer *pt,
    +pipe_put_tile_raw(struct pipe_context *pipe,
    +                  struct pipe_transfer *pt,
                       uint x, uint y, uint w, uint h,
                       const void *p, int src_stride);
     
     
     void
    -pipe_get_tile_rgba(struct pipe_transfer *pt,
    +pipe_get_tile_rgba(struct pipe_context *pipe,
    +                   struct pipe_transfer *pt,
                        uint x, uint y, uint w, uint h,
                        float *p);
     
     void
    -pipe_get_tile_swizzle(struct pipe_transfer *pt,
    +pipe_get_tile_swizzle(struct pipe_context *pipe,
    +		      struct pipe_transfer *pt,
                           uint x,
                           uint y,
                           uint w,
    @@ -85,18 +89,21 @@ pipe_get_tile_swizzle(struct pipe_transfer *pt,
                           float *p);
     
     void
    -pipe_put_tile_rgba(struct pipe_transfer *pt,
    +pipe_put_tile_rgba(struct pipe_context *pipe,
    +                   struct pipe_transfer *pt,
                        uint x, uint y, uint w, uint h,
                        const float *p);
     
     
     void
    -pipe_get_tile_z(struct pipe_transfer *pt,
    +pipe_get_tile_z(struct pipe_context *pipe,
    +                struct pipe_transfer *pt,
                     uint x, uint y, uint w, uint h,
                     uint *z);
     
     void
    -pipe_put_tile_z(struct pipe_transfer *pt,
    +pipe_put_tile_z(struct pipe_context *pipe,
    +                struct pipe_transfer *pt,
                     uint x, uint y, uint w, uint h,
                     const uint *z);
     
    diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
    index 0763b5bb0e4..beb4722901e 100644
    --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
    +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
    @@ -680,14 +680,14 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
        assert(r);
     
        for (i = 0; i < 3; ++i) {
    -      r->tex_transfer[i] = r->pipe->screen->get_tex_transfer
    +      r->tex_transfer[i] = r->pipe->get_tex_transfer
           (
    -         r->pipe->screen, r->textures.all[i],
    +         r->pipe, r->textures.all[i],
              0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
              r->textures.all[i]->width0, r->textures.all[i]->height0
           );
     
    -      r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
    +      r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]);
        }
     }
     
    @@ -699,8 +699,8 @@ xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
        assert(r);
     
        for (i = 0; i < 3; ++i) {
    -      r->pipe->screen->transfer_unmap(r->pipe->screen, r->tex_transfer[i]);
    -      r->pipe->screen->tex_transfer_destroy(r->tex_transfer[i]);
    +      r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]);
    +      r->pipe->tex_transfer_destroy(r->pipe, r->tex_transfer[i]);
        }
     }
     
    diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c
    index afcea616d59..f6cb1fc9be7 100644
    --- a/src/gallium/drivers/cell/ppu/cell_context.c
    +++ b/src/gallium/drivers/cell/ppu/cell_context.c
    @@ -158,6 +158,7 @@ cell_create_context(struct pipe_screen *screen,
        cell_init_shader_functions(cell);
        cell_init_surface_functions(cell);
        cell_init_vertex_functions(cell);
    +   cell_init_texture_transfer_funcs(cell);
     
        cell->draw = cell_draw_create(cell);
     
    diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
    index 576d514741d..c54576b3c32 100644
    --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
    +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
    @@ -1352,7 +1352,7 @@ gen_stencil_values(struct spe_function *f,
         */
        ASSERT(fbS_reg != newS_reg);
     
    -   /* The code also assumes the the stencil_max_value is of the form 
    +   /* The code also assumes that the stencil_max_value is of the form
         * 2^n-1 and can therefore be used as a mask for the valid bits in 
         * addition to a maximum.  Make sure this is the case as well.
         * The clever math below exploits the fact that incrementing a 
    diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c
    index 28e5e6d706d..39284f3a5d1 100644
    --- a/src/gallium/drivers/cell/ppu/cell_spu.c
    +++ b/src/gallium/drivers/cell/ppu/cell_spu.c
    @@ -135,7 +135,7 @@ cell_thread_function(void *arg)
     
     /**
      * Create the SPU threads.  This is done once during driver initialization.
    - * This involves setting the the "init" message which is sent to each SPU.
    + * This involves setting the "init" message which is sent to each SPU.
      * The init message specifies an SPU id, total number of SPUs, location
      * and number of batch buffers, etc.
      */
    diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
    index d3efb8ecea2..9510ea9ac2b 100644
    --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c
    +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c
    @@ -62,7 +62,8 @@ cell_bind_vertex_elements_state(struct pipe_context *pipe,
     
        cell->dirty |= CELL_NEW_VERTEX;
     
    -   draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem);
    +   if (cell_velems)
    +      draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem);
     }
     
     void
    diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
    index c8a1acd86a9..c65c3b4f885 100644
    --- a/src/gallium/drivers/cell/ppu/cell_texture.c
    +++ b/src/gallium/drivers/cell/ppu/cell_texture.c
    @@ -355,7 +355,7 @@ cell_tex_surface_destroy(struct pipe_surface *surf)
      * back out for glGetTexImage).
      */
     static struct pipe_transfer *
    -cell_get_tex_transfer(struct pipe_screen *screen,
    +cell_get_tex_transfer(struct pipe_context *ctx,
                           struct pipe_texture *texture,
                           unsigned face, unsigned level, unsigned zslice,
                           enum pipe_transfer_usage usage,
    @@ -402,7 +402,7 @@ cell_get_tex_transfer(struct pipe_screen *screen,
     
     
     static void 
    -cell_tex_transfer_destroy(struct pipe_transfer *t)
    +cell_tex_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *t)
     {
        struct cell_transfer *transfer = cell_transfer(t);
        /* Effectively do the texture_update work here - if texture images
    @@ -419,7 +419,7 @@ cell_tex_transfer_destroy(struct pipe_transfer *t)
      * Return pointer to texture image data in linear layout.
      */
     static void *
    -cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
    +cell_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer)
     {
        struct cell_transfer *ctrans = cell_transfer(transfer);
        struct pipe_texture *pt = transfer->texture;
    @@ -471,7 +471,7 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
      * to tiled data.
      */
     static void
    -cell_transfer_unmap(struct pipe_screen *screen,
    +cell_transfer_unmap(struct pipe_context *ctx,
                         struct pipe_transfer *transfer)
     {
        struct cell_transfer *ctrans = cell_transfer(transfer);
    @@ -560,11 +560,14 @@ cell_init_screen_texture_funcs(struct pipe_screen *screen)
        screen->get_tex_surface = cell_get_tex_surface;
        screen->tex_surface_destroy = cell_tex_surface_destroy;
     
    -   screen->get_tex_transfer = cell_get_tex_transfer;
    -   screen->tex_transfer_destroy = cell_tex_transfer_destroy;
    -
    -   screen->transfer_map = cell_transfer_map;
    -   screen->transfer_unmap = cell_transfer_unmap;
    -
        screen->flush_frontbuffer = cell_flush_frontbuffer;
     }
    +
    +void
    +cell_init_texture_transfer_funcs(struct cell_context *cell)
    +{
    +   cell->pipe.get_tex_transfer = cell_get_tex_transfer;
    +   cell->pipe.tex_transfer_destroy = cell_tex_transfer_destroy;
    +   cell->pipe.transfer_map = cell_transfer_map;
    +   cell->pipe.transfer_unmap = cell_transfer_unmap;
    +}
    diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h
    index 2be05793122..ac0b9167750 100644
    --- a/src/gallium/drivers/cell/ppu/cell_texture.h
    +++ b/src/gallium/drivers/cell/ppu/cell_texture.h
    @@ -95,5 +95,7 @@ cell_transfer(struct pipe_transfer *pt)
     extern void
     cell_init_screen_texture_funcs(struct pipe_screen *screen);
     
    +extern void
    +cell_init_texture_transfer_funcs(struct cell_context *cell);
     
     #endif /* CELL_TEXTURE_H */
    diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
    index 3d45a22b7e7..130519ffa50 100644
    --- a/src/gallium/drivers/i915/i915_context.c
    +++ b/src/gallium/drivers/i915/i915_context.c
    @@ -221,6 +221,7 @@ i915_create_context(struct pipe_screen *screen, void *priv)
        i915_init_surface_functions(i915);
        i915_init_state_functions(i915);
        i915_init_flush_functions(i915);
    +   i915_init_texture_functions(i915);
     
        draw_install_aaline_stage(i915->draw, &i915->base);
        draw_install_aapoint_stage(i915->draw, &i915->base);
    diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
    index 9d03ca37fda..5348f62ca74 100644
    --- a/src/gallium/drivers/i915/i915_context.h
    +++ b/src/gallium/drivers/i915/i915_context.h
    @@ -349,6 +349,12 @@ struct pipe_context *i915_create_context(struct pipe_screen *screen,
     					 void *priv);
     
     
    +/***********************************************************************
    + * i915_texture.c
    + */
    +void i915_init_texture_functions(struct i915_context *i915 );
    +
    +
     /***********************************************************************
      * Inline conversion functions.  These are better-typed than the
      * macros used previously:
    diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
    index e54997736f9..0f7395246cc 100644
    --- a/src/gallium/drivers/i915/i915_state.c
    +++ b/src/gallium/drivers/i915/i915_state.c
    @@ -797,7 +797,10 @@ i915_bind_vertex_elements_state(struct pipe_context *pipe,
        draw_flush(i915->draw);
     
        /* pass-through to draw module */
    -   draw_set_vertex_elements(i915->draw, i915_velems->count, i915_velems->velem);
    +   if (i915_velems) {
    +      draw_set_vertex_elements(i915->draw,
    +            i915_velems->count, i915_velems->velem);
    +   }
     }
     
     static void
    diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c
    index 3ce52cdcdbd..b252fb5330c 100644
    --- a/src/gallium/drivers/i915/i915_texture.c
    +++ b/src/gallium/drivers/i915/i915_texture.c
    @@ -795,12 +795,12 @@ i915_tex_surface_destroy(struct pipe_surface *surf)
     
     
     /*
    - * Screen transfer functions
    + * Texture transfer functions
      */
     
     
    -static struct pipe_transfer*
    -i915_get_tex_transfer(struct pipe_screen *screen,
    +static struct pipe_transfer *
    +i915_get_tex_transfer(struct pipe_context *pipe,
                           struct pipe_texture *texture,
                           unsigned face, unsigned level, unsigned zslice,
                           enum pipe_transfer_usage usage, unsigned x, unsigned y,
    @@ -837,7 +837,7 @@ i915_get_tex_transfer(struct pipe_screen *screen,
     }
     
     static void *
    -i915_transfer_map(struct pipe_screen *screen,
    +i915_transfer_map(struct pipe_context *pipe,
                       struct pipe_transfer *transfer)
     {
        struct i915_texture *tex = (struct i915_texture *)transfer->texture;
    @@ -859,7 +859,7 @@ i915_transfer_map(struct pipe_screen *screen,
     }
     
     static void
    -i915_transfer_unmap(struct pipe_screen *screen,
    +i915_transfer_unmap(struct pipe_context *pipe,
                         struct pipe_transfer *transfer)
     {
        struct i915_texture *tex = (struct i915_texture *)transfer->texture;
    @@ -868,7 +868,8 @@ i915_transfer_unmap(struct pipe_screen *screen,
     }
     
     static void
    -i915_tex_transfer_destroy(struct pipe_transfer *trans)
    +i915_tex_transfer_destroy(struct pipe_context *pipe,
    +                          struct pipe_transfer *trans)
     {
        pipe_texture_reference(&trans->texture, NULL);
        FREE(trans);
    @@ -879,6 +880,14 @@ i915_tex_transfer_destroy(struct pipe_transfer *trans)
      * Other texture functions
      */
     
    +void
    +i915_init_texture_functions(struct i915_context *i915 )
    +{
    +   i915->base.get_tex_transfer = i915_get_tex_transfer;
    +   i915->base.transfer_map = i915_transfer_map;
    +   i915->base.transfer_unmap = i915_transfer_unmap;
    +   i915->base.tex_transfer_destroy = i915_tex_transfer_destroy;
    +}
     
     void
     i915_init_screen_texture_functions(struct i915_screen *is)
    @@ -889,8 +898,4 @@ i915_init_screen_texture_functions(struct i915_screen *is)
        is->base.texture_destroy = i915_texture_destroy;
        is->base.get_tex_surface = i915_get_tex_surface;
        is->base.tex_surface_destroy = i915_tex_surface_destroy;
    -   is->base.get_tex_transfer = i915_get_tex_transfer;
    -   is->base.transfer_map = i915_transfer_map;
    -   is->base.transfer_unmap = i915_transfer_unmap;
    -   is->base.tex_transfer_destroy = i915_tex_transfer_destroy;
     }
    diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
    index e0f3cd2a9fe..e3a7c64d489 100644
    --- a/src/gallium/drivers/i965/brw_screen.h
    +++ b/src/gallium/drivers/i965/brw_screen.h
    @@ -181,6 +181,10 @@ void brw_update_texture( struct brw_screen *brw_screen,
     			 struct brw_texture *tex );
     
     
    +/* brw_screen_texture.h
    + */
    +struct brw_context;
    +void brw_tex_init( struct brw_context *brw );
     void brw_screen_tex_init( struct brw_screen *brw_screen );
     void brw_screen_tex_surface_init( struct brw_screen *brw_screen );
     
    diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
    index cc79bfc7715..cadcb7cee2a 100644
    --- a/src/gallium/drivers/i965/brw_screen_texture.c
    +++ b/src/gallium/drivers/i965/brw_screen_texture.c
    @@ -37,6 +37,8 @@
     #include "brw_defines.h"
     #include "brw_structs.h"
     #include "brw_winsys.h"
    +#include "brw_context.h"
    +
     
     
     
    @@ -479,7 +481,7 @@ boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen,
      */
     
     static struct pipe_transfer*
    -brw_get_tex_transfer(struct pipe_screen *screen,
    +brw_get_tex_transfer(struct pipe_context *pipe,
                          struct pipe_texture *texture,
                          unsigned face, unsigned level, unsigned zslice,
                          enum pipe_transfer_usage usage, unsigned x, unsigned y,
    @@ -514,11 +516,11 @@ brw_get_tex_transfer(struct pipe_screen *screen,
     }
     
     static void *
    -brw_transfer_map(struct pipe_screen *screen,
    +brw_transfer_map(struct pipe_context *pipe,
                      struct pipe_transfer *transfer)
     {
        struct brw_texture *tex = brw_texture(transfer->texture);
    -   struct brw_winsys_screen *sws = brw_screen(screen)->sws;
    +   struct brw_winsys_screen *sws = brw_screen(pipe->screen)->sws;
        char *map;
        unsigned usage = transfer->usage;
     
    @@ -541,23 +543,32 @@ brw_transfer_map(struct pipe_screen *screen,
     }
     
     static void
    -brw_transfer_unmap(struct pipe_screen *screen,
    +brw_transfer_unmap(struct pipe_context *pipe,
                        struct pipe_transfer *transfer)
     {
        struct brw_texture *tex = brw_texture(transfer->texture);
    -   struct brw_winsys_screen *sws = brw_screen(screen)->sws;
    +   struct brw_winsys_screen *sws = brw_screen(pipe->screen)->sws;
     
        sws->bo_unmap(tex->bo);
     }
     
     static void
    -brw_tex_transfer_destroy(struct pipe_transfer *trans)
    +brw_tex_transfer_destroy(struct pipe_context *pipe,
    +                         struct pipe_transfer *trans)
     {
        pipe_texture_reference(&trans->texture, NULL);
        FREE(trans);
     }
     
     
    +void brw_tex_init( struct brw_context *brw )
    +{
    +   brw->base.get_tex_transfer = brw_get_tex_transfer;
    +   brw->base.transfer_map = brw_transfer_map;
    +   brw->base.transfer_unmap = brw_transfer_unmap;
    +   brw->base.tex_transfer_destroy = brw_tex_transfer_destroy;
    +}
    +
     void brw_screen_tex_init( struct brw_screen *brw_screen )
     {
        brw_screen->base.is_format_supported = brw_is_format_supported;
    @@ -565,8 +576,4 @@ void brw_screen_tex_init( struct brw_screen *brw_screen )
        brw_screen->base.texture_from_handle = brw_texture_from_handle;
        brw_screen->base.texture_get_handle = brw_texture_get_handle;
        brw_screen->base.texture_destroy = brw_texture_destroy;
    -   brw_screen->base.get_tex_transfer = brw_get_tex_transfer;
    -   brw_screen->base.transfer_map = brw_transfer_map;
    -   brw_screen->base.transfer_unmap = brw_transfer_unmap;
    -   brw_screen->base.tex_transfer_destroy = brw_tex_transfer_destroy;
     }
    diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
    index ef5b428161c..00a542215ad 100644
    --- a/src/gallium/drivers/identity/id_context.c
    +++ b/src/gallium/drivers/identity/id_context.c
    @@ -747,6 +747,75 @@ identity_sampler_view_destroy(struct pipe_context *pipe,
        free(view);
     }
     
    +
    +static struct pipe_transfer *
    +identity_context_get_tex_transfer(struct pipe_context *_context,
    +				  struct pipe_texture *_texture,
    +                                 unsigned face,
    +                                 unsigned level,
    +                                 unsigned zslice,
    +                                 enum pipe_transfer_usage usage,
    +                                 unsigned x,
    +                                 unsigned y,
    +                                 unsigned w,
    +                                 unsigned h)
    +{
    +   struct identity_context *id_context = identity_context(_context);
    +   struct identity_texture *id_texture = identity_texture(_texture);
    +   struct pipe_context *context = id_context->pipe;
    +   struct pipe_texture *texture = id_texture->texture;
    +   struct pipe_transfer *result;
    +
    +   result = context->get_tex_transfer(context,
    +                                     texture,
    +                                     face,
    +                                     level,
    +                                     zslice,
    +                                     usage,
    +                                     x,
    +                                     y,
    +                                     w,
    +                                     h);
    +
    +   if (result)
    +      return identity_transfer_create(id_context, id_texture, result);
    +   return NULL;
    +}
    +
    +static void
    +identity_context_tex_transfer_destroy(struct pipe_context *_pipe,
    +                                      struct pipe_transfer *_transfer)
    +{
    +   identity_transfer_destroy(identity_context(_pipe),
    +                             identity_transfer(_transfer));
    +}
    +
    +static void *
    +identity_context_transfer_map(struct pipe_context *_context,
    +                             struct pipe_transfer *_transfer)
    +{
    +   struct identity_context *id_context = identity_context(_context);
    +   struct identity_transfer *id_transfer = identity_transfer(_transfer);
    +   struct pipe_context *context = id_context->pipe;
    +   struct pipe_transfer *transfer = id_transfer->transfer;
    +
    +   return context->transfer_map(context,
    +				transfer);
    +}
    +
    +static void
    +identity_context_transfer_unmap(struct pipe_context *_context,
    +                               struct pipe_transfer *_transfer)
    +{
    +   struct identity_context *id_context = identity_context(_context);
    +   struct identity_transfer *id_transfer = identity_transfer(_transfer);
    +   struct pipe_context *context = id_context->pipe;
    +   struct pipe_transfer *transfer = id_transfer->transfer;
    +
    +   context->transfer_unmap(context,
    +                          transfer);
    +}
    +
     struct pipe_context *
     identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
     {
    @@ -813,6 +882,10 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
        id_pipe->base.is_buffer_referenced = identity_is_buffer_referenced;
        id_pipe->base.create_sampler_view = identity_create_sampler_view;
        id_pipe->base.sampler_view_destroy = identity_sampler_view_destroy;
    +   id_pipe->base.get_tex_transfer = identity_context_get_tex_transfer;
    +   id_pipe->base.tex_transfer_destroy = identity_context_tex_transfer_destroy;
    +   id_pipe->base.transfer_map = identity_context_transfer_map;
    +   id_pipe->base.transfer_unmap = identity_context_transfer_unmap;
     
        id_pipe->pipe = pipe;
     
    diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c
    index 2b1a60c1bf1..d37fb0042e5 100644
    --- a/src/gallium/drivers/identity/id_objects.c
    +++ b/src/gallium/drivers/identity/id_objects.c
    @@ -30,6 +30,7 @@
     
     #include "id_screen.h"
     #include "id_objects.h"
    +#include "id_context.h"
     
     struct pipe_buffer *
     identity_buffer_create(struct identity_screen *id_screen,
    @@ -142,7 +143,8 @@ identity_surface_destroy(struct identity_surface *id_surface)
     
     
     struct pipe_transfer *
    -identity_transfer_create(struct identity_texture *id_texture,
    +identity_transfer_create(struct identity_context *id_context,
    +			 struct identity_texture *id_texture,
                              struct pipe_transfer *transfer)
     {
        struct identity_transfer *id_transfer;
    @@ -159,25 +161,25 @@ identity_transfer_create(struct identity_texture *id_texture,
        memcpy(&id_transfer->base, transfer, sizeof(struct pipe_transfer));
     
        id_transfer->base.texture = NULL;
    -   pipe_texture_reference(&id_transfer->base.texture, &id_texture->base);
        id_transfer->transfer = transfer;
    +
    +   pipe_texture_reference(&id_transfer->base.texture, &id_texture->base);
        assert(id_transfer->base.texture == &id_texture->base);
     
        return &id_transfer->base;
     
     error:
    -   transfer->texture->screen->tex_transfer_destroy(transfer);
    +   id_context->pipe->tex_transfer_destroy(id_context->pipe, transfer);
        return NULL;
     }
     
     void
    -identity_transfer_destroy(struct identity_transfer *id_transfer)
    +identity_transfer_destroy(struct identity_context *id_context,
    +                          struct identity_transfer *id_transfer)
     {
    -   struct identity_screen *id_screen = identity_screen(id_transfer->base.texture->screen);
    -   struct pipe_screen *screen = id_screen->screen;
    -
        pipe_texture_reference(&id_transfer->base.texture, NULL);
    -   screen->tex_transfer_destroy(id_transfer->transfer);
    +   id_context->pipe->tex_transfer_destroy(id_context->pipe,
    +                                          id_transfer->transfer);
        FREE(id_transfer);
     }
     
    diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h
    index b48df83b3ed..9a07ebe8d72 100644
    --- a/src/gallium/drivers/identity/id_objects.h
    +++ b/src/gallium/drivers/identity/id_objects.h
    @@ -35,6 +35,7 @@
     
     #include "id_screen.h"
     
    +struct identity_context;
     
     struct identity_buffer
     {
    @@ -72,6 +73,7 @@ struct identity_transfer
     {
        struct pipe_transfer base;
     
    +   struct pipe_context *pipe;
        struct pipe_transfer *transfer;
     };
     
    @@ -203,11 +205,13 @@ void
     identity_surface_destroy(struct identity_surface *id_surface);
     
     struct pipe_transfer *
    -identity_transfer_create(struct identity_texture *id_texture,
    +identity_transfer_create(struct identity_context *id_context,
    +			 struct identity_texture *id_texture,
                              struct pipe_transfer *transfer);
     
     void
    -identity_transfer_destroy(struct identity_transfer *id_transfer);
    +identity_transfer_destroy(struct identity_context *id_context,
    +                          struct identity_transfer *id_transfer);
     
     struct pipe_video_surface *
     identity_video_surface_create(struct identity_screen *id_screen,
    diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c
    index b9d0f003d74..419b1465787 100644
    --- a/src/gallium/drivers/identity/id_screen.c
    +++ b/src/gallium/drivers/identity/id_screen.c
    @@ -207,71 +207,6 @@ identity_screen_tex_surface_destroy(struct pipe_surface *_surface)
        identity_surface_destroy(identity_surface(_surface));
     }
     
    -static struct pipe_transfer *
    -identity_screen_get_tex_transfer(struct pipe_screen *_screen,
    -                                 struct pipe_texture *_texture,
    -                                 unsigned face,
    -                                 unsigned level,
    -                                 unsigned zslice,
    -                                 enum pipe_transfer_usage usage,
    -                                 unsigned x,
    -                                 unsigned y,
    -                                 unsigned w,
    -                                 unsigned h)
    -{
    -   struct identity_screen *id_screen = identity_screen(_screen);
    -   struct identity_texture *id_texture = identity_texture(_texture);
    -   struct pipe_screen *screen = id_screen->screen;
    -   struct pipe_texture *texture = id_texture->texture;
    -   struct pipe_transfer *result;
    -
    -   result = screen->get_tex_transfer(screen,
    -                                     texture,
    -                                     face,
    -                                     level,
    -                                     zslice,
    -                                     usage,
    -                                     x,
    -                                     y,
    -                                     w,
    -                                     h);
    -
    -   if (result)
    -      return identity_transfer_create(id_texture, result);
    -   return NULL;
    -}
    -
    -static void
    -identity_screen_tex_transfer_destroy(struct pipe_transfer *_transfer)
    -{
    -   identity_transfer_destroy(identity_transfer(_transfer));
    -}
    -
    -static void *
    -identity_screen_transfer_map(struct pipe_screen *_screen,
    -                             struct pipe_transfer *_transfer)
    -{
    -   struct identity_screen *id_screen = identity_screen(_screen);
    -   struct identity_transfer *id_transfer = identity_transfer(_transfer);
    -   struct pipe_screen *screen = id_screen->screen;
    -   struct pipe_transfer *transfer = id_transfer->transfer;
    -
    -   return screen->transfer_map(screen,
    -                               transfer);
    -}
    -
    -static void
    -identity_screen_transfer_unmap(struct pipe_screen *_screen,
    -                               struct pipe_transfer *_transfer)
    -{
    -   struct identity_screen *id_screen = identity_screen(_screen);
    -   struct identity_transfer *id_transfer = identity_transfer(_transfer);
    -   struct pipe_screen *screen = id_screen->screen;
    -   struct pipe_transfer *transfer = id_transfer->transfer;
    -
    -   screen->transfer_unmap(screen,
    -                          transfer);
    -}
     
     static struct pipe_buffer *
     identity_screen_buffer_create(struct pipe_screen *_screen,
    @@ -488,10 +423,6 @@ identity_screen_create(struct pipe_screen *screen)
        id_screen->base.texture_destroy = identity_screen_texture_destroy;
        id_screen->base.get_tex_surface = identity_screen_get_tex_surface;
        id_screen->base.tex_surface_destroy = identity_screen_tex_surface_destroy;
    -   id_screen->base.get_tex_transfer = identity_screen_get_tex_transfer;
    -   id_screen->base.tex_transfer_destroy = identity_screen_tex_transfer_destroy;
    -   id_screen->base.transfer_map = identity_screen_transfer_map;
    -   id_screen->base.transfer_unmap = identity_screen_transfer_unmap;
        id_screen->base.buffer_create = identity_screen_buffer_create;
        id_screen->base.user_buffer_create = identity_screen_user_buffer_create;
        if (screen->buffer_map)
    diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
    index 41ac1cee72d..89c06ea3ad7 100644
    --- a/src/gallium/drivers/llvmpipe/Makefile
    +++ b/src/gallium/drivers/llvmpipe/Makefile
    @@ -55,7 +55,7 @@ testprogs := lp_test_format	\
     
     LIBS += $(GL_LIB_DEPS) -L. -lllvmpipe -L../../auxiliary/ -lgallium
     
    -$(testprogs): lp_test_% : lp_test_%.o lp_test_main.o libllvmpipe.a
    -	$(LD) $(filter %.o,$^) -o $@ -Wl,--start-group  $(LIBS) -Wl,--end-group
    +#$(testprogs): lp_test_% : lp_test_%.o lp_test_main.o libllvmpipe.a
    +#	$(LD) $(filter %.o,$^) -o $@ -Wl,--start-group  $(LIBS) -Wl,--end-group
     
    -default: $(testprogs)
    +#default: $(testprogs)
    diff --git a/src/gallium/drivers/llvmpipe/lp_buffer.c b/src/gallium/drivers/llvmpipe/lp_buffer.c
    index dab20cb6397..6e0f37393e9 100644
    --- a/src/gallium/drivers/llvmpipe/lp_buffer.c
    +++ b/src/gallium/drivers/llvmpipe/lp_buffer.c
    @@ -33,7 +33,6 @@
     #include "lp_screen.h"
     #include "lp_buffer.h"
     
    -#include "state_tracker/sw_winsys.h"
     
     static void *
     llvmpipe_buffer_map(struct pipe_screen *screen,
    diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
    index bf7ed118095..951a695f964 100644
    --- a/src/gallium/drivers/llvmpipe/lp_context.c
    +++ b/src/gallium/drivers/llvmpipe/lp_context.c
    @@ -175,6 +175,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv )
        llvmpipe->pipe.is_buffer_referenced = llvmpipe_is_buffer_referenced;
     
        llvmpipe_init_query_funcs( llvmpipe );
    +   llvmpipe_init_context_texture_funcs( &llvmpipe->pipe );
     
        /*
         * Create drawing context and plug our rendering stage into it.
    diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
    index f83e5ffacf8..71f991049e5 100644
    --- a/src/gallium/drivers/llvmpipe/lp_context.h
    +++ b/src/gallium/drivers/llvmpipe/lp_context.h
    @@ -45,7 +45,7 @@ struct draw_stage;
     struct lp_fragment_shader;
     struct lp_vertex_shader;
     struct lp_blend_state;
    -struct setup_context;
    +struct lp_setup_context;
     struct lp_velems_state;
     
     struct llvmpipe_context {
    @@ -98,7 +98,7 @@ struct llvmpipe_context {
        int psize_slot;
     
        /** The tiling engine */
    -   struct setup_context *setup;
    +   struct lp_setup_context *setup;
     
        /** The primitive drawing context */
        struct draw_context *draw;
    diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
    index bf832433be1..1b4e8899359 100644
    --- a/src/gallium/drivers/llvmpipe/lp_flush.c
    +++ b/src/gallium/drivers/llvmpipe/lp_flush.c
    @@ -79,12 +79,12 @@ llvmpipe_flush( struct pipe_context *pipe,
     
           for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) {
     	 util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no);
    -         debug_dump_surface(filename, llvmpipe->framebuffer.cbufs[i]);
    +         debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[0]);
           }
     
           if (0) {
              util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no);
    -         debug_dump_surface(filename, llvmpipe->framebuffer.zsbuf);
    +         debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf);
           }
     
           ++frame_no;
    diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
    index 08c8f93794d..5887613120d 100644
    --- a/src/gallium/drivers/llvmpipe/lp_jit.c
    +++ b/src/gallium/drivers/llvmpipe/lp_jit.c
    @@ -57,7 +57,8 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
           elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
           elem_types[LP_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
           elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
    -      elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
    +      elem_types[LP_JIT_TEXTURE_ROW_STRIDE] =
    +         LLVMArrayType(LLVMInt32Type(), LP_MAX_TEXTURE_2D_LEVELS);
           elem_types[LP_JIT_TEXTURE_DATA] =
              LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
                            LP_MAX_TEXTURE_2D_LEVELS);
    @@ -76,9 +77,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
           LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, last_level,
                                  screen->target, texture_type,
                                  LP_JIT_TEXTURE_LAST_LEVEL);
    -      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride,
    +      LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, row_stride,
                                  screen->target, texture_type,
    -                             LP_JIT_TEXTURE_STRIDE);
    +                             LP_JIT_TEXTURE_ROW_STRIDE);
           LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data,
                                  screen->target, texture_type,
                                  LP_JIT_TEXTURE_DATA);
    diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
    index 5cc7a12c03b..13167ae3bf4 100644
    --- a/src/gallium/drivers/llvmpipe/lp_jit.h
    +++ b/src/gallium/drivers/llvmpipe/lp_jit.h
    @@ -51,7 +51,7 @@ struct lp_jit_texture
        uint32_t height;
        uint32_t depth;
        uint32_t last_level;
    -   uint32_t stride;
    +   uint32_t row_stride[LP_MAX_TEXTURE_2D_LEVELS];
        const void *data[LP_MAX_TEXTURE_2D_LEVELS];
     };
     
    @@ -61,7 +61,7 @@ enum {
        LP_JIT_TEXTURE_HEIGHT,
        LP_JIT_TEXTURE_DEPTH,
        LP_JIT_TEXTURE_LAST_LEVEL,
    -   LP_JIT_TEXTURE_STRIDE,
    +   LP_JIT_TEXTURE_ROW_STRIDE,
        LP_JIT_TEXTURE_DATA
     };
     
    diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
    index dc5fc5fc7d6..303f6e3f7e4 100644
    --- a/src/gallium/drivers/llvmpipe/lp_rast.h
    +++ b/src/gallium/drivers/llvmpipe/lp_rast.h
    @@ -95,7 +95,7 @@ struct lp_rast_shader_inputs {
      * Rasterization information for a triangle known to be in this bin,
      * plus inputs to run the shader:
      * These fields are tile- and bin-independent.
    - * Objects of this type are put into the setup_context::data buffer.
    + * Objects of this type are put into the lp_setup_context::data buffer.
      */
     struct lp_rast_triangle {
     #ifdef DEBUG
    diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
    index 72492c0f0ca..505cb21503a 100644
    --- a/src/gallium/drivers/llvmpipe/lp_scene.c
    +++ b/src/gallium/drivers/llvmpipe/lp_scene.c
    @@ -397,7 +397,7 @@ end:
     static boolean
     lp_scene_map_buffers( struct lp_scene *scene )
     {
    -   struct pipe_screen *screen = scene->pipe->screen;
    +   struct pipe_context *pipe = scene->pipe;
        struct pipe_surface *cbuf, *zsbuf;
        int i;
     
    @@ -409,7 +409,7 @@ lp_scene_map_buffers( struct lp_scene *scene )
        for (i = 0; i < scene->fb.nr_cbufs; i++) {
           cbuf = scene->fb.cbufs[i];
           if (cbuf) {
    -	 scene->cbuf_transfer[i] = screen->get_tex_transfer(screen,
    +	 scene->cbuf_transfer[i] = pipe->get_tex_transfer(pipe,
                                                               cbuf->texture,
                                                               cbuf->face,
                                                               cbuf->level,
    @@ -421,7 +421,7 @@ lp_scene_map_buffers( struct lp_scene *scene )
     	 if (!scene->cbuf_transfer[i])
     	    goto fail;
     
    -	 scene->cbuf_map[i] = screen->transfer_map(screen, 
    +	 scene->cbuf_map[i] = pipe->transfer_map(pipe, 
                                                      scene->cbuf_transfer[i]);
     	 if (!scene->cbuf_map[i])
     	    goto fail;
    @@ -432,7 +432,7 @@ lp_scene_map_buffers( struct lp_scene *scene )
         */
        zsbuf = scene->fb.zsbuf;
        if (zsbuf) {
    -      scene->zsbuf_transfer = screen->get_tex_transfer(screen,
    +      scene->zsbuf_transfer = pipe->get_tex_transfer(pipe,
                                                            zsbuf->texture,
                                                            zsbuf->face,
                                                            zsbuf->level,
    @@ -444,7 +444,7 @@ lp_scene_map_buffers( struct lp_scene *scene )
           if (!scene->zsbuf_transfer)
              goto fail;
     
    -      scene->zsbuf_map = screen->transfer_map(screen, 
    +      scene->zsbuf_map = pipe->transfer_map(pipe, 
                                                   scene->zsbuf_transfer);
           if (!scene->zsbuf_map)
     	 goto fail;
    @@ -469,25 +469,25 @@ fail:
     static void
     lp_scene_unmap_buffers( struct lp_scene *scene )
     {
    -   struct pipe_screen *screen = scene->pipe->screen;
    +   struct pipe_context *pipe = scene->pipe;
        unsigned i;
     
        for (i = 0; i < scene->fb.nr_cbufs; i++) {
           if (scene->cbuf_map[i]) 
    -	 screen->transfer_unmap(screen, scene->cbuf_transfer[i]);
    +	 pipe->transfer_unmap(pipe, scene->cbuf_transfer[i]);
     
           if (scene->cbuf_transfer[i])
    -	 screen->tex_transfer_destroy(scene->cbuf_transfer[i]);
    +	 pipe->tex_transfer_destroy(pipe, scene->cbuf_transfer[i]);
     
           scene->cbuf_transfer[i] = NULL;
           scene->cbuf_map[i] = NULL;
        }
     
        if (scene->zsbuf_map) 
    -      screen->transfer_unmap(screen, scene->zsbuf_transfer);
    +      pipe->transfer_unmap(pipe, scene->zsbuf_transfer);
     
        if (scene->zsbuf_transfer)
    -      screen->tex_transfer_destroy(scene->zsbuf_transfer);
    +      pipe->tex_transfer_destroy(pipe, scene->zsbuf_transfer);
     
        scene->zsbuf_transfer = NULL;
        scene->zsbuf_map = NULL;
    diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h
    index f4e62cbf08e..d977f98cfaa 100644
    --- a/src/gallium/drivers/llvmpipe/lp_screen.h
    +++ b/src/gallium/drivers/llvmpipe/lp_screen.h
    @@ -34,9 +34,7 @@
     #ifndef LP_SCREEN_H
     #define LP_SCREEN_H
     
    -#include 
    -#include 
    -#include 
    +#include "os/os_llvm.h"
     #include 
     
     #include "pipe/p_screen.h"
    diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
    index 059584f2a77..cd16b6b2d38 100644
    --- a/src/gallium/drivers/llvmpipe/lp_setup.c
    +++ b/src/gallium/drivers/llvmpipe/lp_setup.c
    @@ -52,11 +52,11 @@
     #include "draw/draw_vbuf.h"
     
     
    -static void set_scene_state( struct setup_context *, unsigned );
    +static void set_scene_state( struct lp_setup_context *, unsigned );
     
     
     struct lp_scene *
    -lp_setup_get_current_scene(struct setup_context *setup)
    +lp_setup_get_current_scene(struct lp_setup_context *setup)
     {
        if (!setup->scene) {
     
    @@ -74,7 +74,7 @@ lp_setup_get_current_scene(struct setup_context *setup)
     
     
     static void
    -first_triangle( struct setup_context *setup,
    +first_triangle( struct lp_setup_context *setup,
                     const float (*v0)[4],
                     const float (*v1)[4],
                     const float (*v2)[4])
    @@ -85,7 +85,7 @@ first_triangle( struct setup_context *setup,
     }
     
     static void
    -first_line( struct setup_context *setup,
    +first_line( struct lp_setup_context *setup,
     	    const float (*v0)[4],
     	    const float (*v1)[4])
     {
    @@ -95,7 +95,7 @@ first_line( struct setup_context *setup,
     }
     
     static void
    -first_point( struct setup_context *setup,
    +first_point( struct lp_setup_context *setup,
     	     const float (*v0)[4])
     {
        set_scene_state( setup, SETUP_ACTIVE );
    @@ -103,7 +103,7 @@ first_point( struct setup_context *setup,
        setup->point( setup, v0 );
     }
     
    -static void reset_context( struct setup_context *setup )
    +static void reset_context( struct lp_setup_context *setup )
     {
        LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
     
    @@ -131,7 +131,7 @@ static void reset_context( struct setup_context *setup )
     
     /** Rasterize all scene's bins */
     static void
    -lp_setup_rasterize_scene( struct setup_context *setup,
    +lp_setup_rasterize_scene( struct lp_setup_context *setup,
                               boolean write_depth )
     {
        struct lp_scene *scene = lp_setup_get_current_scene(setup);
    @@ -148,7 +148,7 @@ lp_setup_rasterize_scene( struct setup_context *setup,
     
     
     static void
    -begin_binning( struct setup_context *setup )
    +begin_binning( struct lp_setup_context *setup )
     {
        struct lp_scene *scene = lp_setup_get_current_scene(setup);
     
    @@ -184,7 +184,7 @@ begin_binning( struct setup_context *setup )
      * TODO: fast path for fullscreen clears and no triangles.
      */
     static void
    -execute_clears( struct setup_context *setup )
    +execute_clears( struct lp_setup_context *setup )
     {
        LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
     
    @@ -194,7 +194,7 @@ execute_clears( struct setup_context *setup )
     
     
     static void
    -set_scene_state( struct setup_context *setup,
    +set_scene_state( struct lp_setup_context *setup,
                unsigned new_state )
     {
        unsigned old_state = setup->state;
    @@ -229,7 +229,7 @@ set_scene_state( struct setup_context *setup,
     
     
     void
    -lp_setup_flush( struct setup_context *setup,
    +lp_setup_flush( struct lp_setup_context *setup,
                     unsigned flags )
     {
        LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
    @@ -239,7 +239,7 @@ lp_setup_flush( struct setup_context *setup,
     
     
     void
    -lp_setup_bind_framebuffer( struct setup_context *setup,
    +lp_setup_bind_framebuffer( struct lp_setup_context *setup,
                                const struct pipe_framebuffer_state *fb )
     {
        LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
    @@ -256,7 +256,7 @@ lp_setup_bind_framebuffer( struct setup_context *setup,
     
     
     void
    -lp_setup_clear( struct setup_context *setup,
    +lp_setup_clear( struct lp_setup_context *setup,
                     const float *color,
                     double depth,
                     unsigned stencil,
    @@ -314,7 +314,7 @@ lp_setup_clear( struct setup_context *setup,
      * Emit a fence.
      */
     struct pipe_fence_handle *
    -lp_setup_fence( struct setup_context *setup )
    +lp_setup_fence( struct lp_setup_context *setup )
     {
        struct lp_scene *scene = lp_setup_get_current_scene(setup);
        const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */
    @@ -334,10 +334,11 @@ lp_setup_fence( struct setup_context *setup )
     
     
     void 
    -lp_setup_set_triangle_state( struct setup_context *setup,
    +lp_setup_set_triangle_state( struct lp_setup_context *setup,
                                  unsigned cull_mode,
                                  boolean ccw_is_frontface,
    -                             boolean scissor )
    +                             boolean scissor,
    +                             boolean gl_rasterization_rules)
     {
        LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
     
    @@ -345,12 +346,13 @@ lp_setup_set_triangle_state( struct setup_context *setup,
        setup->cullmode = cull_mode;
        setup->triangle = first_triangle;
        setup->scissor_test = scissor;
    +   setup->pixel_offset = gl_rasterization_rules ? 0.5f : 0.0f;
     }
     
     
     
     void
    -lp_setup_set_fs_inputs( struct setup_context *setup,
    +lp_setup_set_fs_inputs( struct lp_setup_context *setup,
                             const struct lp_shader_input *input,
                             unsigned nr )
     {
    @@ -361,7 +363,7 @@ lp_setup_set_fs_inputs( struct setup_context *setup,
     }
     
     void
    -lp_setup_set_fs_functions( struct setup_context *setup,
    +lp_setup_set_fs_functions( struct lp_setup_context *setup,
                                lp_jit_frag_func jit_function0,
                                lp_jit_frag_func jit_function1,
                                boolean opaque )
    @@ -376,7 +378,7 @@ lp_setup_set_fs_functions( struct setup_context *setup,
     }
     
     void
    -lp_setup_set_fs_constants(struct setup_context *setup,
    +lp_setup_set_fs_constants(struct lp_setup_context *setup,
                               struct pipe_buffer *buffer)
     {
        LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffer);
    @@ -388,7 +390,7 @@ lp_setup_set_fs_constants(struct setup_context *setup,
     
     
     void
    -lp_setup_set_alpha_ref_value( struct setup_context *setup,
    +lp_setup_set_alpha_ref_value( struct lp_setup_context *setup,
                                   float alpha_ref_value )
     {
        LP_DBG(DEBUG_SETUP, "%s %f\n", __FUNCTION__, alpha_ref_value);
    @@ -400,7 +402,7 @@ lp_setup_set_alpha_ref_value( struct setup_context *setup,
     }
     
     void
    -lp_setup_set_blend_color( struct setup_context *setup,
    +lp_setup_set_blend_color( struct lp_setup_context *setup,
                               const struct pipe_blend_color *blend_color )
     {
        LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
    @@ -415,7 +417,7 @@ lp_setup_set_blend_color( struct setup_context *setup,
     
     
     void
    -lp_setup_set_scissor( struct setup_context *setup,
    +lp_setup_set_scissor( struct lp_setup_context *setup,
                           const struct pipe_scissor_state *scissor )
     {
        LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
    @@ -430,7 +432,7 @@ lp_setup_set_scissor( struct setup_context *setup,
     
     
     void 
    -lp_setup_set_flatshade_first( struct setup_context *setup,
    +lp_setup_set_flatshade_first( struct lp_setup_context *setup,
                                   boolean flatshade_first )
     {
        setup->flatshade_first = flatshade_first;
    @@ -438,7 +440,7 @@ lp_setup_set_flatshade_first( struct setup_context *setup,
     
     
     void 
    -lp_setup_set_vertex_info( struct setup_context *setup,
    +lp_setup_set_vertex_info( struct lp_setup_context *setup,
                               struct vertex_info *vertex_info )
     {
        /* XXX: just silently holding onto the pointer:
    @@ -451,7 +453,7 @@ lp_setup_set_vertex_info( struct setup_context *setup,
      * Called during state validation when LP_NEW_SAMPLER_VIEW is set.
      */
     void
    -lp_setup_set_fragment_sampler_views(struct setup_context *setup,
    +lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
                                         unsigned num,
                                         struct pipe_sampler_view **views)
     {
    @@ -473,13 +475,13 @@ lp_setup_set_fragment_sampler_views(struct setup_context *setup,
              jit_tex->height = tex->height0;
              jit_tex->depth = tex->depth0;
              jit_tex->last_level = tex->last_level;
    -         jit_tex->stride = lp_tex->stride[0];
              if (!lp_tex->dt) {
                 /* regular texture - setup array of mipmap level pointers */
                 int j;
    -            for (j = 0; j < LP_MAX_TEXTURE_2D_LEVELS; j++) {
    +            for (j = 0; j <= tex->last_level; j++) {
                    jit_tex->data[j] =
                       (ubyte *) lp_tex->data + lp_tex->level_offset[j];
    +               jit_tex->row_stride[j] = lp_tex->stride[j];
                 }
              }
              else {
    @@ -492,6 +494,7 @@ lp_setup_set_fragment_sampler_views(struct setup_context *setup,
                 struct sw_winsys *winsys = screen->winsys;
                 jit_tex->data[0] = winsys->displaytarget_map(winsys, lp_tex->dt,
                                                           PIPE_BUFFER_USAGE_CPU_READ);
    +            jit_tex->row_stride[0] = lp_tex->stride[0];
                 assert(jit_tex->data[0]);
              }
     
    @@ -513,7 +516,7 @@ lp_setup_set_fragment_sampler_views(struct setup_context *setup,
      * being rendered and the current scene being built.
      */
     unsigned
    -lp_setup_is_texture_referenced( const struct setup_context *setup,
    +lp_setup_is_texture_referenced( const struct lp_setup_context *setup,
                                     const struct pipe_texture *texture )
     {
        unsigned i;
    @@ -542,7 +545,7 @@ lp_setup_is_texture_referenced( const struct setup_context *setup,
      * Called by vbuf code when we're about to draw something.
      */
     void
    -lp_setup_update_state( struct setup_context *setup )
    +lp_setup_update_state( struct lp_setup_context *setup )
     {
        struct lp_scene *scene = lp_setup_get_current_scene(setup);
     
    @@ -660,7 +663,7 @@ lp_setup_update_state( struct setup_context *setup )
     /* Only caller is lp_setup_vbuf_destroy()
      */
     void 
    -lp_setup_destroy( struct setup_context *setup )
    +lp_setup_destroy( struct lp_setup_context *setup )
     {
        reset_context( setup );
     
    @@ -685,12 +688,12 @@ lp_setup_destroy( struct setup_context *setup )
      * the draw module.  Currently also creates a rasterizer to use with
      * it.
      */
    -struct setup_context *
    +struct lp_setup_context *
     lp_setup_create( struct pipe_context *pipe,
                      struct draw_context *draw )
     {
        unsigned i;
    -   struct setup_context *setup = CALLOC_STRUCT(setup_context);
    +   struct lp_setup_context *setup = CALLOC_STRUCT(lp_setup_context);
     
        if (!setup)
           return NULL;
    diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
    index 72116b8c6c4..414eaec98d1 100644
    --- a/src/gallium/drivers/llvmpipe/lp_setup.h
    +++ b/src/gallium/drivers/llvmpipe/lp_setup.h
    @@ -61,79 +61,80 @@ struct pipe_framebuffer_state;
     struct lp_fragment_shader;
     struct lp_jit_context;
     
    -struct setup_context *
    +struct lp_setup_context *
     lp_setup_create( struct pipe_context *pipe,
                      struct draw_context *draw );
     
     void
    -lp_setup_clear(struct setup_context *setup,
    +lp_setup_clear(struct lp_setup_context *setup,
                    const float *clear_color,
                    double clear_depth,
                    unsigned clear_stencil,
                    unsigned flags);
     
     struct pipe_fence_handle *
    -lp_setup_fence( struct setup_context *setup );
    +lp_setup_fence( struct lp_setup_context *setup );
     
     
     void
    -lp_setup_flush( struct setup_context *setup,
    +lp_setup_flush( struct lp_setup_context *setup,
                     unsigned flags );
     
     
     void
    -lp_setup_bind_framebuffer( struct setup_context *setup,
    +lp_setup_bind_framebuffer( struct lp_setup_context *setup,
                                const struct pipe_framebuffer_state *fb );
     
     void 
    -lp_setup_set_triangle_state( struct setup_context *setup,
    +lp_setup_set_triangle_state( struct lp_setup_context *setup,
                                  unsigned cullmode,
                                  boolean front_is_ccw,
    -                             boolean scissor );
    +                             boolean scissor,
    +                             boolean gl_rasterization_rules );
     
     void
    -lp_setup_set_fs_inputs( struct setup_context *setup,
    +lp_setup_set_fs_inputs( struct lp_setup_context *setup,
                             const struct lp_shader_input *interp,
                             unsigned nr );
     
     void
    -lp_setup_set_fs_functions( struct setup_context *setup,
    +lp_setup_set_fs_functions( struct lp_setup_context *setup,
                                lp_jit_frag_func jit_function0,
                                lp_jit_frag_func jit_function1,
                                boolean opaque );
     
     void
    -lp_setup_set_fs_constants(struct setup_context *setup,
    +lp_setup_set_fs_constants(struct lp_setup_context *setup,
                               struct pipe_buffer *buffer);
     
     
     void
    -lp_setup_set_alpha_ref_value( struct setup_context *setup,
    +lp_setup_set_alpha_ref_value( struct lp_setup_context *setup,
                                   float alpha_ref_value );
     
     void
    -lp_setup_set_blend_color( struct setup_context *setup,
    +lp_setup_set_blend_color( struct lp_setup_context *setup,
                               const struct pipe_blend_color *blend_color );
     
     void
    -lp_setup_set_scissor( struct setup_context *setup,
    +lp_setup_set_scissor( struct lp_setup_context *setup,
                           const struct pipe_scissor_state *scissor );
     
     void
    -lp_setup_set_fragment_sampler_views(struct setup_context *setup,
    +lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
                                         unsigned num,
                                         struct pipe_sampler_view **views);
     
     unsigned
    -lp_setup_is_texture_referenced( const struct setup_context *setup,
    +lp_setup_is_texture_referenced( const struct lp_setup_context *setup,
                                     const struct pipe_texture *texture );
     
     void
    -lp_setup_set_flatshade_first( struct setup_context *setup, 
    +lp_setup_set_flatshade_first( struct lp_setup_context *setup, 
                                   boolean flatshade_first );
     
     void
    -lp_setup_set_vertex_info( struct setup_context *setup, 
    +lp_setup_set_vertex_info( struct lp_setup_context *setup, 
                               struct vertex_info *info );
     
     
    diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
    index a5fc34e54a2..464fb369840 100644
    --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
    +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
    @@ -65,7 +65,7 @@ struct lp_scene_queue;
      * Subclass of vbuf_render, plugged directly into the draw module as
      * the rendering backend.
      */
    -struct setup_context
    +struct lp_setup_context
     {
        struct vbuf_render base;
     
    @@ -89,6 +89,7 @@ struct setup_context
        boolean ccw_is_frontface;
        boolean scissor_test;
        unsigned cullmode;
    +   float pixel_offset;
     
        struct pipe_framebuffer_state fb;
     
    @@ -131,29 +132,29 @@ struct setup_context
     
        unsigned dirty;   /**< bitmask of LP_SETUP_NEW_x bits */
     
    -   void (*point)( struct setup_context *,
    +   void (*point)( struct lp_setup_context *,
                       const float (*v0)[4]);
     
    -   void (*line)( struct setup_context *,
    +   void (*line)( struct lp_setup_context *,
                      const float (*v0)[4],
                      const float (*v1)[4]);
     
    -   void (*triangle)( struct setup_context *,
    +   void (*triangle)( struct lp_setup_context *,
                          const float (*v0)[4],
                          const float (*v1)[4],
                          const float (*v2)[4]);
     };
     
    -void lp_setup_choose_triangle( struct setup_context *setup );
    -void lp_setup_choose_line( struct setup_context *setup );
    -void lp_setup_choose_point( struct setup_context *setup );
    +void lp_setup_choose_triangle( struct lp_setup_context *setup );
    +void lp_setup_choose_line( struct lp_setup_context *setup );
    +void lp_setup_choose_point( struct lp_setup_context *setup );
     
    -struct lp_scene *lp_setup_get_current_scene(struct setup_context *setup);
    +struct lp_scene *lp_setup_get_current_scene(struct lp_setup_context *setup);
     
    -void lp_setup_init_vbuf(struct setup_context *setup);
    +void lp_setup_init_vbuf(struct lp_setup_context *setup);
     
    -void lp_setup_update_state( struct setup_context *setup );
    +void lp_setup_update_state( struct lp_setup_context *setup );
     
    -void lp_setup_destroy( struct setup_context *setup );
    +void lp_setup_destroy( struct lp_setup_context *setup );
     
     #endif
    diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
    index feea79d3943..be41c44e6f5 100644
    --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
    +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
    @@ -31,7 +31,7 @@
     
     #include "lp_setup_context.h"
     
    -static void line_nop( struct setup_context *setup,
    +static void line_nop( struct lp_setup_context *setup,
                           const float (*v0)[4],
                           const float (*v1)[4] )
     {
    @@ -39,7 +39,7 @@ static void line_nop( struct setup_context *setup,
     
     
     void 
    -lp_setup_choose_line( struct setup_context *setup )
    +lp_setup_choose_line( struct lp_setup_context *setup )
     {
        setup->line = line_nop;
     }
    diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c
    index f03ca729b24..9f69e6c5ce2 100644
    --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
    +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
    @@ -31,14 +31,14 @@
     
     #include "lp_setup_context.h"
     
    -static void point_nop( struct setup_context *setup,
    +static void point_nop( struct lp_setup_context *setup,
                            const float (*v0)[4] )
     {
     }
     
     
     void 
    -lp_setup_choose_point( struct setup_context *setup )
    +lp_setup_choose_point( struct lp_setup_context *setup )
     {
        setup->point = point_nop;
     }
    diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
    index e75412ac9aa..ac6264dc73e 100644
    --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
    +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
    @@ -41,7 +41,8 @@
     /**
      * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
      */
    -static void constant_coef( struct lp_rast_triangle *tri,
    +static void constant_coef( struct lp_setup_context *setup,
    +                           struct lp_rast_triangle *tri,
                                unsigned slot,
     			   const float value,
                                unsigned i )
    @@ -56,7 +57,8 @@ static void constant_coef( struct lp_rast_triangle *tri,
      * Compute a0, dadx and dady for a linearly interpolated coefficient,
      * for a triangle.
      */
    -static void linear_coef( struct lp_rast_triangle *tri,
    +static void linear_coef( struct lp_setup_context *setup,
    +                         struct lp_rast_triangle *tri,
                              float oneoverarea,
                              unsigned slot,
                              const float (*v1)[4],
    @@ -90,8 +92,8 @@ static void linear_coef( struct lp_rast_triangle *tri,
         * instead - i'll switch to this later.
         */
        tri->inputs.a0[slot][i] = (a1 -
    -                              (dadx * (v1[0][0] - 0.5f) +
    -                               dady * (v1[0][1] - 0.5f)));
    +                              (dadx * (v1[0][0] - setup->pixel_offset) +
    +                               dady * (v1[0][1] - setup->pixel_offset)));
     }
     
     
    @@ -103,7 +105,8 @@ static void linear_coef( struct lp_rast_triangle *tri,
      * Later, when we compute the value at a particular fragment position we'll
      * divide the interpolated value by the interpolated W at that fragment.
      */
    -static void perspective_coef( struct lp_rast_triangle *tri,
    +static void perspective_coef( struct lp_setup_context *setup,
    +                              struct lp_rast_triangle *tri,
                                   float oneoverarea,
                                   unsigned slot,
     			      const float (*v1)[4],
    @@ -125,8 +128,8 @@ static void perspective_coef( struct lp_rast_triangle *tri,
        tri->inputs.dadx[slot][i] = dadx;
        tri->inputs.dady[slot][i] = dady;
        tri->inputs.a0[slot][i] = (a1 -
    -                              (dadx * (v1[0][0] - 0.5f) +
    -                               dady * (v1[0][1] - 0.5f)));
    +                              (dadx * (v1[0][0] - setup->pixel_offset) +
    +                               dady * (v1[0][1] - setup->pixel_offset)));
     }
     
     
    @@ -137,7 +140,8 @@ static void perspective_coef( struct lp_rast_triangle *tri,
      * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
      */
     static void
    -setup_fragcoord_coef(struct lp_rast_triangle *tri,
    +setup_fragcoord_coef(struct lp_setup_context *setup,
    +                     struct lp_rast_triangle *tri,
                          float oneoverarea,
                          unsigned slot,
                          const float (*v1)[4],
    @@ -153,27 +157,28 @@ setup_fragcoord_coef(struct lp_rast_triangle *tri,
        tri->inputs.dadx[slot][1] = 0.0;
        tri->inputs.dady[slot][1] = 1.0;
        /*Z*/
    -   linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 2);
    +   linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2);
        /*W*/
    -   linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 3);
    +   linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3);
     }
     
     
    -static void setup_facing_coef( struct lp_rast_triangle *tri,
    +static void setup_facing_coef( struct lp_setup_context *setup,
    +                               struct lp_rast_triangle *tri,
                                    unsigned slot,
                                    boolean frontface )
     {
    -   constant_coef( tri, slot, 1.0f - frontface, 0 );
    -   constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
    -   constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
    -   constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
    +   constant_coef( setup, tri, slot, 1.0f - frontface, 0 );
    +   constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */
    +   constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */
    +   constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */
     }
     
     
     /**
      * Compute the tri->coef[] array dadx, dady, a0 values.
      */
    -static void setup_tri_coefficients( struct setup_context *setup,
    +static void setup_tri_coefficients( struct lp_setup_context *setup,
     				    struct lp_rast_triangle *tri,
                                         float oneoverarea,
     				    const float (*v1)[4],
    @@ -185,7 +190,7 @@ static void setup_tri_coefficients( struct setup_context *setup,
     
        /* The internal position input is in slot zero:
         */
    -   setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3);
    +   setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3);
     
        /* setup interpolation for all the remaining attributes:
         */
    @@ -196,27 +201,27 @@ static void setup_tri_coefficients( struct setup_context *setup,
           switch (setup->fs.input[slot].interp) {
           case LP_INTERP_CONSTANT:
              for (i = 0; i < NUM_CHANNELS; i++)
    -            constant_coef(tri, slot+1, v3[vert_attr][i], i);
    +            constant_coef(setup, tri, slot+1, v3[vert_attr][i], i);
              break;
     
           case LP_INTERP_LINEAR:
              for (i = 0; i < NUM_CHANNELS; i++)
    -            linear_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
    +            linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
              break;
     
           case LP_INTERP_PERSPECTIVE:
              for (i = 0; i < NUM_CHANNELS; i++)
    -            perspective_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
    +            perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
              break;
     
           case LP_INTERP_POSITION:
              /* XXX: fix me - duplicates the values in slot zero.
               */
    -         setup_fragcoord_coef(tri, oneoverarea, slot+1, v1, v2, v3);
    +         setup_fragcoord_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3);
              break;
     
           case LP_INTERP_FACING:
    -         setup_facing_coef(tri, slot+1, frontface);
    +         setup_facing_coef(setup, tri, slot+1, frontface);
              break;
     
           default:
    @@ -274,19 +279,19 @@ alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size)
      * bins for the tiles which we overlap.
      */
     static void 
    -do_triangle_ccw(struct setup_context *setup,
    +do_triangle_ccw(struct lp_setup_context *setup,
     		const float (*v1)[4],
     		const float (*v2)[4],
     		const float (*v3)[4],
     		boolean frontfacing )
     {
        /* x/y positions in fixed point */
    -   const int x1 = subpixel_snap(v1[0][0]);
    -   const int x2 = subpixel_snap(v2[0][0]);
    -   const int x3 = subpixel_snap(v3[0][0]);
    -   const int y1 = subpixel_snap(v1[0][1]);
    -   const int y2 = subpixel_snap(v2[0][1]);
    -   const int y3 = subpixel_snap(v3[0][1]);
    +   const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset);
    +   const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset);
    +   const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset);
    +   const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset);
    +   const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset);
    +   const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset);
     
        struct lp_scene *scene = lp_setup_get_current_scene(setup);
        struct lp_rast_triangle *tri;
    @@ -565,7 +570,7 @@ do_triangle_ccw(struct setup_context *setup,
     }
     
     
    -static void triangle_cw( struct setup_context *setup,
    +static void triangle_cw( struct lp_setup_context *setup,
     			 const float (*v0)[4],
     			 const float (*v1)[4],
     			 const float (*v2)[4] )
    @@ -574,7 +579,7 @@ static void triangle_cw( struct setup_context *setup,
     }
     
     
    -static void triangle_ccw( struct setup_context *setup,
    +static void triangle_ccw( struct lp_setup_context *setup,
     			 const float (*v0)[4],
     			 const float (*v1)[4],
     			 const float (*v2)[4] )
    @@ -583,7 +588,7 @@ static void triangle_ccw( struct setup_context *setup,
     }
     
     
    -static void triangle_both( struct setup_context *setup,
    +static void triangle_both( struct lp_setup_context *setup,
     			   const float (*v0)[4],
     			   const float (*v1)[4],
     			   const float (*v2)[4] )
    @@ -602,7 +607,7 @@ static void triangle_both( struct setup_context *setup,
     }
     
     
    -static void triangle_nop( struct setup_context *setup,
    +static void triangle_nop( struct lp_setup_context *setup,
     			  const float (*v0)[4],
     			  const float (*v1)[4],
     			  const float (*v2)[4] )
    @@ -611,7 +616,7 @@ static void triangle_nop( struct setup_context *setup,
     
     
     void 
    -lp_setup_choose_triangle( struct setup_context *setup )
    +lp_setup_choose_triangle( struct lp_setup_context *setup )
     {
        switch (setup->cullmode) {
        case PIPE_WINDING_NONE:
    diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
    index 671e74465c0..d7336d82b21 100644
    --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
    +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
    @@ -48,10 +48,10 @@
       
     
     /** cast wrapper */
    -static struct setup_context *
    -setup_context(struct vbuf_render *vbr)
    +static struct lp_setup_context *
    +lp_setup_context(struct vbuf_render *vbr)
     {
    -   return (struct setup_context *) vbr;
    +   return (struct lp_setup_context *) vbr;
     }
     
     
    @@ -59,7 +59,7 @@ setup_context(struct vbuf_render *vbr)
     static const struct vertex_info *
     lp_setup_get_vertex_info(struct vbuf_render *vbr)
     {
    -   struct setup_context *setup = setup_context(vbr);
    +   struct lp_setup_context *setup = lp_setup_context(vbr);
        return setup->vertex_info;
     }
     
    @@ -68,7 +68,7 @@ static boolean
     lp_setup_allocate_vertices(struct vbuf_render *vbr,
                               ushort vertex_size, ushort nr_vertices)
     {
    -   struct setup_context *setup = setup_context(vbr);
    +   struct lp_setup_context *setup = lp_setup_context(vbr);
        unsigned size = vertex_size * nr_vertices;
     
        if (setup->vertex_buffer_size < size) {
    @@ -92,7 +92,7 @@ lp_setup_release_vertices(struct vbuf_render *vbr)
     static void *
     lp_setup_map_vertices(struct vbuf_render *vbr)
     {
    -   struct setup_context *setup = setup_context(vbr);
    +   struct lp_setup_context *setup = lp_setup_context(vbr);
        return setup->vertex_buffer;
     }
     
    @@ -101,7 +101,7 @@ lp_setup_unmap_vertices(struct vbuf_render *vbr,
                            ushort min_index,
                            ushort max_index )
     {
    -   struct setup_context *setup = setup_context(vbr);
    +   struct lp_setup_context *setup = lp_setup_context(vbr);
        assert( setup->vertex_buffer_size >= (max_index+1) * setup->vertex_size );
        /* do nothing */
     }
    @@ -110,7 +110,7 @@ lp_setup_unmap_vertices(struct vbuf_render *vbr,
     static boolean
     lp_setup_set_primitive(struct vbuf_render *vbr, unsigned prim)
     {
    -   setup_context(vbr)->prim = prim;
    +   lp_setup_context(vbr)->prim = prim;
        return TRUE;
     }
     
    @@ -129,7 +129,7 @@ static INLINE const_float4_ptr get_vert( const void *vertex_buffer,
     static void
     lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
     {
    -   struct setup_context *setup = setup_context(vbr);
    +   struct lp_setup_context *setup = lp_setup_context(vbr);
        const unsigned stride = setup->vertex_info->size * sizeof(float);
        const void *vertex_buffer = setup->vertex_buffer;
        unsigned i;
    @@ -284,7 +284,7 @@ lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
     static void
     lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
     {
    -   struct setup_context *setup = setup_context(vbr);
    +   struct lp_setup_context *setup = lp_setup_context(vbr);
        const unsigned stride = setup->vertex_info->size * sizeof(float);
        const void *vertex_buffer =
           (void *) get_vert(setup->vertex_buffer, start, stride);
    @@ -436,7 +436,7 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
     static void
     lp_setup_vbuf_destroy(struct vbuf_render *vbr)
     {
    -   lp_setup_destroy(setup_context(vbr));
    +   lp_setup_destroy(lp_setup_context(vbr));
     }
     
     
    @@ -444,7 +444,7 @@ lp_setup_vbuf_destroy(struct vbuf_render *vbr)
      * Create the post-transform vertex handler for the given context.
      */
     void
    -lp_setup_init_vbuf(struct setup_context *setup)
    +lp_setup_init_vbuf(struct lp_setup_context *setup)
     {
        setup->base.max_indices = LP_MAX_VBUF_INDEXES;
        setup->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE;
    diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
    index 9b87cd202e7..13cd934b13f 100644
    --- a/src/gallium/drivers/llvmpipe/lp_state.h
    +++ b/src/gallium/drivers/llvmpipe/lp_state.h
    @@ -31,7 +31,7 @@
     #ifndef LP_STATE_H
     #define LP_STATE_H
     
    -#include 
    +#include "os/os_llvm.h"
     
     #include "pipe/p_state.h"
     #include "tgsi/tgsi_scan.h"
    diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
    index 3e681b561aa..b75ca282067 100644
    --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
    +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
    @@ -40,7 +40,7 @@
      * - depth/stencil test (stencil TBI)
      * - blending
      *
    - * This file has only the glue to assembly the fragment pipeline.  The actual
    + * This file has only the glue to assemble the fragment pipeline.  The actual
      * plumbing of converting Gallium state into LLVM IR is done elsewhere, in the
      * lp_bld_*.[ch] files, and in a complete generic and reusable way. Here we
      * muster the LLVM JIT execution engine to create a function that follows an
    @@ -95,6 +95,9 @@
     #include "lp_tex_sample.h"
     
     
    +#include 
    +
    +
     static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
     static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
     
    diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
    index feb012816c9..6df3ef25b0e 100644
    --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
    +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
    @@ -62,7 +62,8 @@ void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe,
           lp_setup_set_triangle_state( llvmpipe->setup,
                        llvmpipe->rasterizer->cull_mode,
                        llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW,
    -                   llvmpipe->rasterizer->scissor);
    +                   llvmpipe->rasterizer->scissor,
    +                   llvmpipe->rasterizer->gl_rasterization_rules);
        }
     
        llvmpipe->dirty |= LP_NEW_RASTERIZER;
    diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
    index a9b99945f92..1df98978988 100644
    --- a/src/gallium/drivers/llvmpipe/lp_test.h
    +++ b/src/gallium/drivers/llvmpipe/lp_test.h
    @@ -41,7 +41,7 @@
     #include 
     #include 
     
    -#include 
    +#include "os/os_llvm.h"
     #include 
     #include 
     #include 
    diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
    index d05157991bb..2c4d7fb6e14 100644
    --- a/src/gallium/drivers/llvmpipe/lp_test_format.c
    +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
    @@ -29,7 +29,7 @@
     #include 
     #include 
     
    -#include 
    +#include "os/os_llvm.h"
     #include 
     #include 
     #include 
    diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
    index cb59a94464a..799df182b6a 100644
    --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
    +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
    @@ -29,7 +29,7 @@
     #define LP_TEX_SAMPLE_H
     
     
    -#include 
    +#include "os/os_llvm.h"
     
     
     struct lp_sampler_static_state;
    diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
    index 5a3cf37d6d8..662508af61a 100644
    --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
    +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
    @@ -147,7 +147,7 @@ LP_LLVM_TEXTURE_MEMBER(width,      LP_JIT_TEXTURE_WIDTH, TRUE)
     LP_LLVM_TEXTURE_MEMBER(height,     LP_JIT_TEXTURE_HEIGHT, TRUE)
     LP_LLVM_TEXTURE_MEMBER(depth,      LP_JIT_TEXTURE_DEPTH, TRUE)
     LP_LLVM_TEXTURE_MEMBER(last_level, LP_JIT_TEXTURE_LAST_LEVEL, TRUE)
    -LP_LLVM_TEXTURE_MEMBER(stride,     LP_JIT_TEXTURE_STRIDE, TRUE)
    +LP_LLVM_TEXTURE_MEMBER(row_stride, LP_JIT_TEXTURE_ROW_STRIDE, FALSE)
     LP_LLVM_TEXTURE_MEMBER(data_ptr,   LP_JIT_TEXTURE_DATA, FALSE)
     
     
    @@ -204,7 +204,7 @@ lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
        sampler->dynamic_state.base.height = lp_llvm_texture_height;
        sampler->dynamic_state.base.depth = lp_llvm_texture_depth;
        sampler->dynamic_state.base.last_level = lp_llvm_texture_last_level;
    -   sampler->dynamic_state.base.stride = lp_llvm_texture_stride;
    +   sampler->dynamic_state.base.row_stride = lp_llvm_texture_row_stride;
        sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr;
        sampler->dynamic_state.static_state = static_state;
        sampler->dynamic_state.context_ptr = context_ptr;
    diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
    index 74b7b4ec5e1..f2c6dbd088a 100644
    --- a/src/gallium/drivers/llvmpipe/lp_texture.c
    +++ b/src/gallium/drivers/llvmpipe/lp_texture.c
    @@ -243,7 +243,7 @@ llvmpipe_tex_surface_destroy(struct pipe_surface *surf)
     
     
     static struct pipe_transfer *
    -llvmpipe_get_tex_transfer(struct pipe_screen *screen,
    +llvmpipe_get_tex_transfer(struct pipe_context *pipe,
                               struct pipe_texture *texture,
                               unsigned face, unsigned level, unsigned zslice,
                               enum pipe_transfer_usage usage,
    @@ -294,7 +294,8 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen,
     
     
     static void 
    -llvmpipe_tex_transfer_destroy(struct pipe_transfer *transfer)
    +llvmpipe_tex_transfer_destroy(struct pipe_context *pipe,
    +                              struct pipe_transfer *transfer)
     {
        /* Effectively do the texture_update work here - if texture images
         * needed post-processing to put them into hardware layout, this is
    @@ -307,10 +308,10 @@ llvmpipe_tex_transfer_destroy(struct pipe_transfer *transfer)
     
     
     static void *
    -llvmpipe_transfer_map( struct pipe_screen *_screen,
    +llvmpipe_transfer_map( struct pipe_context *pipe,
                            struct pipe_transfer *transfer )
     {
    -   struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
    +   struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
        ubyte *map, *xfer_map;
        struct llvmpipe_texture *lpt;
        enum pipe_format format;
    @@ -351,10 +352,10 @@ llvmpipe_transfer_map( struct pipe_screen *_screen,
     
     
     static void
    -llvmpipe_transfer_unmap(struct pipe_screen *screen,
    -                       struct pipe_transfer *transfer)
    +llvmpipe_transfer_unmap(struct pipe_context *pipe,
    +                        struct pipe_transfer *transfer)
     {
    -   struct llvmpipe_screen *lp_screen = llvmpipe_screen(screen);
    +   struct llvmpipe_screen *lp_screen = llvmpipe_screen(pipe->screen);
        struct llvmpipe_texture *lpt;
     
        assert(transfer->texture);
    @@ -376,9 +377,14 @@ llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen)
     
        screen->get_tex_surface = llvmpipe_get_tex_surface;
        screen->tex_surface_destroy = llvmpipe_tex_surface_destroy;
    -
    -   screen->get_tex_transfer = llvmpipe_get_tex_transfer;
    -   screen->tex_transfer_destroy = llvmpipe_tex_transfer_destroy;
    -   screen->transfer_map = llvmpipe_transfer_map;
    -   screen->transfer_unmap = llvmpipe_transfer_unmap;
    +}
    +
    +
    +void
    +llvmpipe_init_context_texture_funcs(struct pipe_context *pipe)
    +{
    +   pipe->get_tex_transfer = llvmpipe_get_tex_transfer;
    +   pipe->tex_transfer_destroy = llvmpipe_tex_transfer_destroy;
    +   pipe->transfer_map = llvmpipe_transfer_map;
    +   pipe->transfer_unmap = llvmpipe_transfer_unmap;
     }
    diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
    index b23f929b167..94b667abf31 100644
    --- a/src/gallium/drivers/llvmpipe/lp_texture.h
    +++ b/src/gallium/drivers/llvmpipe/lp_texture.h
    @@ -98,5 +98,7 @@ llvmpipe_transfer(struct pipe_transfer *pt)
     extern void
     llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen);
     
    +extern void
    +llvmpipe_init_context_texture_funcs(struct pipe_context *pipe);
     
     #endif /* LP_TEXTURE_H */
    diff --git a/src/gallium/drivers/nouveau/nv04_surface_2d.c b/src/gallium/drivers/nouveau/nv04_surface_2d.c
    index b074547c4da..93114465d5e 100644
    --- a/src/gallium/drivers/nouveau/nv04_surface_2d.c
    +++ b/src/gallium/drivers/nouveau/nv04_surface_2d.c
    @@ -518,7 +518,6 @@ nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d
     		ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ;
     	}
     
    -	struct nv40_screen* screen = (struct nv40_screen*)pscreen;
     	ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE;
     
     	struct pipe_texture templ;
    diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
    index 279b74445ca..825c167b01b 100644
    --- a/src/gallium/drivers/nv30/nv30_context.c
    +++ b/src/gallium/drivers/nv30/nv30_context.c
    @@ -74,6 +74,7 @@ nv30_create(struct pipe_screen *pscreen, void *priv)
     	nv30_init_query_functions(nv30);
     	nv30_init_surface_functions(nv30);
     	nv30_init_state_functions(nv30);
    +	nv30_init_transfer_functions(nv30);
     
     	/* Create, configure, and install fallback swtnl path */
     	nv30->draw = draw_create();
    diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
    index 34b5953ccf1..38a17a44552 100644
    --- a/src/gallium/drivers/nv30/nv30_context.h
    +++ b/src/gallium/drivers/nv30/nv30_context.h
    @@ -168,6 +168,7 @@ struct nv30_state_entry {
     extern void nv30_init_state_functions(struct nv30_context *nv30);
     extern void nv30_init_surface_functions(struct nv30_context *nv30);
     extern void nv30_init_query_functions(struct nv30_context *nv30);
    +extern void nv30_init_transfer_functions(struct nv30_context *nv30);
     
     extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen);
     
    diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
    index 85433d20953..db24335b7c1 100644
    --- a/src/gallium/drivers/nv30/nv30_screen.c
    +++ b/src/gallium/drivers/nv30/nv30_screen.c
    @@ -214,7 +214,6 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
     	pscreen->context_create = nv30_create;
     
     	nv30_screen_init_miptree_functions(pscreen);
    -	nv30_screen_init_transfer_functions(pscreen);
     
     	/* 3D object */
     	switch (dev->chipset & 0xf0) {
    diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h
    index 8591cd31cab..b7856cdf005 100644
    --- a/src/gallium/drivers/nv30/nv30_screen.h
    +++ b/src/gallium/drivers/nv30/nv30_screen.h
    @@ -35,7 +35,4 @@ nv30_screen(struct pipe_screen *screen)
     	return (struct nv30_screen *)screen;
     }
     
    -void
    -nv30_screen_init_transfer_functions(struct pipe_screen *pscreen);
    -
     #endif
    diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c
    index 3aeda51ea19..cfc109bb740 100644
    --- a/src/gallium/drivers/nv30/nv30_transfer.c
    +++ b/src/gallium/drivers/nv30/nv30_transfer.c
    @@ -33,11 +33,12 @@ nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned h
     }
     
     static struct pipe_transfer *
    -nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
    +nv30_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt,
     		  unsigned face, unsigned level, unsigned zslice,
     		  enum pipe_transfer_usage usage,
     		  unsigned x, unsigned y, unsigned w, unsigned h)
     {
    +        struct pipe_screen *pscreen = pcontext->screen;
     	struct nv30_miptree *mt = (struct nv30_miptree *)pt;
     	struct nv30_transfer *tx;
     	struct pipe_texture tx_tex_template, *tx_tex;
    @@ -117,12 +118,13 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
     }
     
     static void
    -nv30_transfer_del(struct pipe_transfer *ptx)
    +nv30_transfer_del(struct pipe_context *pcontext,
    +                  struct pipe_transfer *ptx)
     {
     	struct nv30_transfer *tx = (struct nv30_transfer *)ptx;
     
     	if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) {
    -		struct pipe_screen *pscreen = ptx->texture->screen;
    +		struct pipe_screen *pscreen = pcontext->screen;
     		struct nv30_screen *nvscreen = nv30_screen(pscreen);
     		struct pipe_surface *dst;
     
    @@ -145,8 +147,9 @@ nv30_transfer_del(struct pipe_transfer *ptx)
     }
     
     static void *
    -nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
    +nv30_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx)
     {
    +        struct pipe_screen *pscreen = pcontext->screen;
     	struct nv30_transfer *tx = (struct nv30_transfer *)ptx;
     	struct nv04_surface *ns = (struct nv04_surface *)tx->surface;
     	struct nv30_miptree *mt = (struct nv30_miptree *)tx->surface->texture;
    @@ -160,8 +163,9 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
     }
     
     static void
    -nv30_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
    +nv30_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx)
     {
    +        struct pipe_screen *pscreen = pcontext->screen;
     	struct nv30_transfer *tx = (struct nv30_transfer *)ptx;
     	struct nv30_miptree *mt = (struct nv30_miptree *)tx->surface->texture;
     
    @@ -169,10 +173,10 @@ nv30_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
     }
     
     void
    -nv30_screen_init_transfer_functions(struct pipe_screen *pscreen)
    +nv30_init_transfer_functions(struct nv30_context *nv30)
     {
    -	pscreen->get_tex_transfer = nv30_transfer_new;
    -	pscreen->tex_transfer_destroy = nv30_transfer_del;
    -	pscreen->transfer_map = nv30_transfer_map;
    -	pscreen->transfer_unmap = nv30_transfer_unmap;
    +	nv30->pipe.get_tex_transfer = nv30_transfer_new;
    +	nv30->pipe.tex_transfer_destroy = nv30_transfer_del;
    +	nv30->pipe.transfer_map = nv30_transfer_map;
    +	nv30->pipe.transfer_unmap = nv30_transfer_unmap;
     }
    diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
    index 65dc73e88b3..e828f17643b 100644
    --- a/src/gallium/drivers/nv40/nv40_context.c
    +++ b/src/gallium/drivers/nv40/nv40_context.c
    @@ -74,6 +74,7 @@ nv40_create(struct pipe_screen *pscreen, void *priv)
     	nv40_init_query_functions(nv40);
     	nv40_init_surface_functions(nv40);
     	nv40_init_state_functions(nv40);
    +	nv40_init_transfer_functions(nv40);
     
     	/* Create, configure, and install fallback swtnl path */
     	nv40->draw = draw_create();
    diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
    index 4d2ffd9772d..1e1d64ee498 100644
    --- a/src/gallium/drivers/nv40/nv40_context.h
    +++ b/src/gallium/drivers/nv40/nv40_context.h
    @@ -184,6 +184,7 @@ struct nv40_state_entry {
     extern void nv40_init_state_functions(struct nv40_context *nv40);
     extern void nv40_init_surface_functions(struct nv40_context *nv40);
     extern void nv40_init_query_functions(struct nv40_context *nv40);
    +extern void nv40_init_transfer_functions(struct nv40_context *nv40);
     
     extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen);
     
    diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c
    index b216c5e38c9..dbcc33d8d9e 100644
    --- a/src/gallium/drivers/nv40/nv40_screen.c
    +++ b/src/gallium/drivers/nv40/nv40_screen.c
    @@ -201,7 +201,6 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
     	pscreen->context_create = nv40_create;
     
     	nv40_screen_init_miptree_functions(pscreen);
    -	nv40_screen_init_transfer_functions(pscreen);
     
     	/* 3D object */
     	switch (dev->chipset & 0xf0) {
    diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h
    index 9437aa050d4..2765ab764ae 100644
    --- a/src/gallium/drivers/nv40/nv40_screen.h
    +++ b/src/gallium/drivers/nv40/nv40_screen.h
    @@ -34,7 +34,4 @@ nv40_screen(struct pipe_screen *screen)
     	return (struct nv40_screen *)screen;
     }
     
    -void
    -nv40_screen_init_transfer_functions(struct pipe_screen *pscreen);
    -
     #endif
    diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c
    index 0462a042c38..c552a681138 100644
    --- a/src/gallium/drivers/nv40/nv40_transfer.c
    +++ b/src/gallium/drivers/nv40/nv40_transfer.c
    @@ -33,11 +33,12 @@ nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned h
     }
     
     static struct pipe_transfer *
    -nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
    +nv40_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt,
     		  unsigned face, unsigned level, unsigned zslice,
     		  enum pipe_transfer_usage usage,
     		  unsigned x, unsigned y, unsigned w, unsigned h)
     {
    +        struct pipe_screen *pscreen = pcontext->screen;
     	struct nv40_miptree *mt = (struct nv40_miptree *)pt;
     	struct nv40_transfer *tx;
     	struct pipe_texture tx_tex_template, *tx_tex;
    @@ -117,12 +118,12 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
     }
     
     static void
    -nv40_transfer_del(struct pipe_transfer *ptx)
    +nv40_transfer_del(struct pipe_context *pcontext, struct pipe_transfer *ptx)
     {
     	struct nv40_transfer *tx = (struct nv40_transfer *)ptx;
     
     	if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) {
    -		struct pipe_screen *pscreen = ptx->texture->screen;
    +		struct pipe_screen *pscreen = pcontext->screen;
     		struct nv40_screen *nvscreen = nv40_screen(pscreen);
     		struct pipe_surface *dst;
     
    @@ -145,8 +146,9 @@ nv40_transfer_del(struct pipe_transfer *ptx)
     }
     
     static void *
    -nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
    +nv40_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx)
     {
    +        struct pipe_screen *pscreen = pcontext->screen;
     	struct nv40_transfer *tx = (struct nv40_transfer *)ptx;
     	struct nv04_surface *ns = (struct nv04_surface *)tx->surface;
     	struct nv40_miptree *mt = (struct nv40_miptree *)tx->surface->texture;
    @@ -160,8 +162,9 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
     }
     
     static void
    -nv40_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
    +nv40_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx)
     {
    +        struct pipe_screen *pscreen = pcontext->screen;
     	struct nv40_transfer *tx = (struct nv40_transfer *)ptx;
     	struct nv40_miptree *mt = (struct nv40_miptree *)tx->surface->texture;
     
    @@ -169,10 +172,10 @@ nv40_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
     }
     
     void
    -nv40_screen_init_transfer_functions(struct pipe_screen *pscreen)
    +nv40_init_transfer_functions(struct nv40_context *nv40)
     {
    -	pscreen->get_tex_transfer = nv40_transfer_new;
    -	pscreen->tex_transfer_destroy = nv40_transfer_del;
    -	pscreen->transfer_map = nv40_transfer_map;
    -	pscreen->transfer_unmap = nv40_transfer_unmap;
    +	nv40->pipe.get_tex_transfer = nv40_transfer_new;
    +	nv40->pipe.tex_transfer_destroy = nv40_transfer_del;
    +	nv40->pipe.transfer_map = nv40_transfer_map;
    +	nv40->pipe.transfer_unmap = nv40_transfer_unmap;
     }
    diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
    index b289eef0fc2..c93c5d127c4 100644
    --- a/src/gallium/drivers/nv40/nv40_vertprog.c
    +++ b/src/gallium/drivers/nv40/nv40_vertprog.c
    @@ -742,7 +742,7 @@ nv40_vertprog_translate(struct nv40_context *nv40,
     	}
     
     	/* Redirect post-transform vertex position to a temp if user clip
    -	 * planes are enabled.  We need to append code the the vtxprog
    +	 * planes are enabled.  We need to append code to the vtxprog
     	 * to handle clip planes later.
     	 */
     	if (vp->ucp.nr)  {
    diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
    index 0eb42f323ff..aa14e17872d 100644
    --- a/src/gallium/drivers/nv50/nv50_context.c
    +++ b/src/gallium/drivers/nv50/nv50_context.c
    @@ -97,6 +97,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
     	nv50_init_surface_functions(nv50);
     	nv50_init_state_functions(nv50);
     	nv50_init_query_functions(nv50);
    +        nv50_init_transfer_functions(nv50);
     
     	nv50->draw = draw_create();
     	assert(nv50->draw);
    diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
    index f35bc411feb..bc7831d9aca 100644
    --- a/src/gallium/drivers/nv50/nv50_context.h
    +++ b/src/gallium/drivers/nv50/nv50_context.h
    @@ -186,6 +186,7 @@ nv50_context(struct pipe_context *pipe)
     extern void nv50_init_surface_functions(struct nv50_context *nv50);
     extern void nv50_init_state_functions(struct nv50_context *nv50);
     extern void nv50_init_query_functions(struct nv50_context *nv50);
    +extern void nv50_init_transfer_functions(struct nv50_context *nv50);
     
     extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen);
     
    diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
    index 8283524b638..d7f5863fb71 100644
    --- a/src/gallium/drivers/nv50/nv50_screen.c
    +++ b/src/gallium/drivers/nv50/nv50_screen.c
    @@ -234,7 +234,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
     	pscreen->context_create = nv50_create;
     
     	nv50_screen_init_miptree_functions(pscreen);
    -	nv50_transfer_init_screen_functions(pscreen);
     
     	/* DMA engine object */
     	ret = nouveau_grobj_alloc(chan, 0xbeef5039,
    diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
    index d1bc80cb9ed..ec19ea655b1 100644
    --- a/src/gallium/drivers/nv50/nv50_screen.h
    +++ b/src/gallium/drivers/nv50/nv50_screen.h
    @@ -38,6 +38,4 @@ nv50_screen(struct pipe_screen *screen)
     	return (struct nv50_screen *)screen;
     }
     
    -void nv50_transfer_init_screen_functions(struct pipe_screen *);
    -
     #endif
    diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
    index 7c360e9e73a..9eb223eca65 100644
    --- a/src/gallium/drivers/nv50/nv50_transfer.c
    +++ b/src/gallium/drivers/nv50/nv50_transfer.c
    @@ -121,11 +121,12 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
     }
     
     static struct pipe_transfer *
    -nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
    +nv50_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt,
     		  unsigned face, unsigned level, unsigned zslice,
     		  enum pipe_transfer_usage usage,
     		  unsigned x, unsigned y, unsigned w, unsigned h)
     {
    +        struct pipe_screen *pscreen = pcontext->screen;
     	struct nouveau_device *dev = nouveau_screen(pscreen)->device;
     	struct nv50_miptree *mt = nv50_miptree(pt);
     	struct nv50_miptree_level *lvl = &mt->level[level];
    @@ -186,7 +187,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
     }
     
     static void
    -nv50_transfer_del(struct pipe_transfer *ptx)
    +nv50_transfer_del(struct pipe_context *pcontext, struct pipe_transfer *ptx)
     {
     	struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
     	struct nv50_miptree *mt = nv50_miptree(ptx->texture);
    @@ -196,7 +197,7 @@ nv50_transfer_del(struct pipe_transfer *ptx)
     	unsigned ny = util_format_get_nblocksy(pt->format, tx->base.height);
     
     	if (ptx->usage & PIPE_TRANSFER_WRITE) {
    -		struct pipe_screen *pscreen = pt->screen;
    +		struct pipe_screen *pscreen = pcontext->screen;
     
     		nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
     					tx->base.stride, tx->bo->tile_mode,
    @@ -218,7 +219,7 @@ nv50_transfer_del(struct pipe_transfer *ptx)
     }
     
     static void *
    -nv50_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
    +nv50_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx)
     {
     	struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
     	unsigned flags = 0;
    @@ -236,7 +237,7 @@ nv50_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
     }
     
     static void
    -nv50_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
    +nv50_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx)
     {
     	struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
     
    @@ -244,12 +245,12 @@ nv50_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
     }
     
     void
    -nv50_transfer_init_screen_functions(struct pipe_screen *pscreen)
    +nv50_init_transfer_functions(struct nv50_context *nv50)
     {
    -	pscreen->get_tex_transfer = nv50_transfer_new;
    -	pscreen->tex_transfer_destroy = nv50_transfer_del;
    -	pscreen->transfer_map = nv50_transfer_map;
    -	pscreen->transfer_unmap = nv50_transfer_unmap;
    +	nv50->pipe.get_tex_transfer = nv50_transfer_new;
    +	nv50->pipe.tex_transfer_destroy = nv50_transfer_del;
    +	nv50->pipe.transfer_map = nv50_transfer_map;
    +	nv50->pipe.transfer_unmap = nv50_transfer_unmap;
     }
     
     void
    diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
    index 923e1e541ff..8606c0004e5 100644
    --- a/src/gallium/drivers/r300/r300_context.c
    +++ b/src/gallium/drivers/r300/r300_context.c
    @@ -34,6 +34,7 @@
     #include "r300_screen.h"
     #include "r300_state_invariant.h"
     #include "r300_texture.h"
    +#include "r300_transfer.h"
     
     #include "radeon_winsys.h"
     
    @@ -209,6 +210,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     
         r300_init_query_functions(r300);
     
    +    r300_init_transfer_functions(r300);
    +
         /* r300_init_surface_functions(r300); */
     
         r300_init_state_functions(r300);
    diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
    index 3b70bcda827..7b86669888b 100644
    --- a/src/gallium/drivers/r300/r300_context.h
    +++ b/src/gallium/drivers/r300/r300_context.h
    @@ -405,6 +405,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     struct draw_stage* r300_draw_stage(struct r300_context* r300);
     void r300_init_state_functions(struct r300_context* r300);
     void r300_init_surface_functions(struct r300_context* r300);
    +void r300_init_tex_functions( struct pipe_context *pipe );
     
     static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags)
     {
    diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
    index a249e8b36be..c67cc868713 100644
    --- a/src/gallium/drivers/r300/r300_reg.h
    +++ b/src/gallium/drivers/r300/r300_reg.h
    @@ -540,7 +540,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
     #       define R300_PVS_FIRST_INST(x)            ((x) << 0)
     #       define R300_PVS_XYZW_VALID_INST(x)       ((x) << 10)
     #       define R300_PVS_LAST_INST(x)             ((x) << 20)
    -/* Addresses are relative the the vertex program parameters area. */
    +/* Addresses are relative to the vertex program parameters area. */
     #define R300_VAP_PVS_CONST_CNTL             0x22D4
     #       define R300_PVS_CONST_BASE_OFFSET_SHIFT  0
     #       define R300_PVS_MAX_CONST_ADDR_SHIFT     16
    @@ -1857,7 +1857,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
      * The destination register index is in FPI1 (color) and FPI3 (alpha)
      * together with enable bits.
      * There are separate enable bits for writing into temporary registers
    - * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_*
    + * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_*
      * /DSTA_OUTPUT). You can write to both at once, or not write at all (the
      * same index must be used for both).
      *
    diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
    index 9c001ae186d..ccf39876a59 100644
    --- a/src/gallium/drivers/r300/r300_render.c
    +++ b/src/gallium/drivers/r300/r300_render.c
    @@ -274,14 +274,14 @@ static void r300_emit_draw_elements(struct r300_context *r300,
     #endif
         CS_LOCALS(r300);
     
    -    assert((start * indexSize)  % 4 == 0);
    +    assert((start * indexSize) % 4 == 0);
         assert(count < (1 << 24));
     
    +    maxIndex = MIN3(maxIndex, r300->vertex_buffer_max_index, count - minIndex);
    +
         DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n",
             count, minIndex, maxIndex);
     
    -    maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index);
    -
         if (alt_num_verts) {
             BEGIN_CS(15);
             OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
    diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
    index 5880eecd5fe..64d1909a382 100644
    --- a/src/gallium/drivers/r300/r300_screen.c
    +++ b/src/gallium/drivers/r300/r300_screen.c
    @@ -27,7 +27,6 @@
     
     #include "r300_context.h"
     #include "r300_texture.h"
    -#include "r300_transfer.h"
     
     #include "radeon_winsys.h"
     #include "r300_winsys.h"
    @@ -290,7 +289,6 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys)
         r300screen->screen.context_create = r300_create_context;
     
         r300_init_screen_texture_functions(&r300screen->screen);
    -    r300_init_screen_transfer_functions(&r300screen->screen);
         u_simple_screen_init(&r300screen->screen);
     
         return &r300screen->screen;
    diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
    index d73ec78fda8..d4b7376f126 100644
    --- a/src/gallium/drivers/r300/r300_state.c
    +++ b/src/gallium/drivers/r300/r300_state.c
    @@ -1061,7 +1061,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
                                         const struct pipe_vertex_buffer* buffers)
     {
         struct r300_context* r300 = r300_context(pipe);
    -    unsigned i, max_index = ~0;
    +    unsigned i, max_index = (1 << 24) - 1;
     
         memcpy(r300->vertex_buffer, buffers,
             sizeof(struct pipe_vertex_buffer) * count);
    diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c
    index ec89681a3c0..495e3dee767 100644
    --- a/src/gallium/drivers/r300/r300_transfer.c
    +++ b/src/gallium/drivers/r300/r300_transfer.c
    @@ -118,15 +118,15 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx,
     }
     
     static struct pipe_transfer*
    -r300_get_tex_transfer(struct pipe_screen *screen,
    +r300_get_tex_transfer(struct pipe_context *ctx,
                           struct pipe_texture *texture,
                           unsigned face, unsigned level, unsigned zslice,
                           enum pipe_transfer_usage usage, unsigned x, unsigned y,
                           unsigned w, unsigned h)
     {
         struct r300_texture *tex = (struct r300_texture *)texture;
    +    struct r300_screen *r300screen = r300_screen(ctx->screen);
         struct r300_transfer *trans;
    -    struct r300_screen *r300screen = r300_screen(screen);
         struct pipe_texture template;
     
         trans = CALLOC_STRUCT(r300_transfer);
    @@ -136,7 +136,7 @@ r300_get_tex_transfer(struct pipe_screen *screen,
             trans->transfer.usage = usage;
             trans->transfer.width = w;
             trans->transfer.height = h;
    -        trans->ctx = r300screen->ctx;
    +        trans->ctx = ctx;
             trans->x = x;
             trans->y = y;
             trans->level = level;
    @@ -174,8 +174,10 @@ r300_get_tex_transfer(struct pipe_screen *screen,
                 }
     
                 /* Create the temporary texture. */
    -            trans->detiled_texture =
    -                (struct r300_texture*)screen->texture_create(screen, &template);
    +            trans->detiled_texture = (struct r300_texture*)
    +               ctx->screen->texture_create(ctx->screen,
    +                                           &template);
    +
                 assert(!trans->detiled_texture->microtile &&
                        !trans->detiled_texture->macrotile);
     
    @@ -187,7 +189,7 @@ r300_get_tex_transfer(struct pipe_screen *screen,
                 if (usage & PIPE_TRANSFER_READ) {
                     /* We cannot map a tiled texture directly because the data is
                      * in a different order, therefore we do detiling using a blit. */
    -                r300_copy_from_tiled_texture(r300screen->ctx, trans);
    +                r300_copy_from_tiled_texture(ctx, trans);
                 }
             } else {
                 trans->transfer.x = x;
    @@ -203,7 +205,8 @@ r300_get_tex_transfer(struct pipe_screen *screen,
         return &trans->transfer;
     }
     
    -static void r300_tex_transfer_destroy(struct pipe_transfer *trans)
    +static void r300_tex_transfer_destroy(struct pipe_context *ctx,
    +                                      struct pipe_transfer *trans)
     {
         struct r300_transfer *r300transfer = r300_transfer(trans);
     
    @@ -219,7 +222,7 @@ static void r300_tex_transfer_destroy(struct pipe_transfer *trans)
         FREE(trans);
     }
     
    -static void* r300_transfer_map(struct pipe_screen *screen,
    +static void* r300_transfer_map(struct pipe_context *ctx,
                                    struct pipe_transfer *transfer)
     {
         struct r300_transfer *r300transfer = r300_transfer(transfer);
    @@ -230,12 +233,12 @@ static void* r300_transfer_map(struct pipe_screen *screen,
         if (r300transfer->detiled_texture) {
             /* The detiled texture is of the same size as the region being mapped
              * (no offset needed). */
    -        return pipe_buffer_map(screen,
    +        return pipe_buffer_map(ctx->screen,
                                    r300transfer->detiled_texture->buffer,
                                    pipe_transfer_buffer_flags(transfer));
         } else {
             /* Tiling is disabled. */
    -        map = pipe_buffer_map(screen, tex->buffer,
    +        map = pipe_buffer_map(ctx->screen, tex->buffer,
                                   pipe_transfer_buffer_flags(transfer));
     
             if (!map) {
    @@ -248,23 +251,26 @@ static void* r300_transfer_map(struct pipe_screen *screen,
         }
     }
     
    -static void r300_transfer_unmap(struct pipe_screen *screen,
    +static void r300_transfer_unmap(struct pipe_context *ctx,
                                     struct pipe_transfer *transfer)
     {
         struct r300_transfer *r300transfer = r300_transfer(transfer);
         struct r300_texture *tex = (struct r300_texture*)transfer->texture;
     
         if (r300transfer->detiled_texture) {
    -        pipe_buffer_unmap(screen, r300transfer->detiled_texture->buffer);
    +        pipe_buffer_unmap(ctx->screen, r300transfer->detiled_texture->buffer);
         } else {
    -        pipe_buffer_unmap(screen, tex->buffer);
    +        pipe_buffer_unmap(ctx->screen, tex->buffer);
         }
     }
     
    -void r300_init_screen_transfer_functions(struct pipe_screen *screen)
    +
    +void r300_init_transfer_functions( struct r300_context *r300ctx )
     {
    -    screen->get_tex_transfer = r300_get_tex_transfer;
    -    screen->tex_transfer_destroy = r300_tex_transfer_destroy;
    -    screen->transfer_map = r300_transfer_map;
    -    screen->transfer_unmap = r300_transfer_unmap;
    +   struct pipe_context *ctx = &r300ctx->context;
    +
    +   ctx->get_tex_transfer = r300_get_tex_transfer;
    +   ctx->tex_transfer_destroy = r300_tex_transfer_destroy;
    +   ctx->transfer_map = r300_transfer_map;
    +   ctx->transfer_unmap = r300_transfer_unmap;
     }
    diff --git a/src/gallium/drivers/r300/r300_transfer.h b/src/gallium/drivers/r300/r300_transfer.h
    index 60d1d3dc85c..79baf6d0480 100644
    --- a/src/gallium/drivers/r300/r300_transfer.h
    +++ b/src/gallium/drivers/r300/r300_transfer.h
    @@ -26,6 +26,8 @@
     
     #include "pipe/p_screen.h"
     
    -void r300_init_screen_transfer_functions(struct pipe_screen *screen);
    +struct r300_context;
    +
    +void r300_init_transfer_functions(struct r300_context *r300ctx);
     
     #endif
    diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile
    index 1c6e4ae0768..239655d628b 100644
    --- a/src/gallium/drivers/softpipe/Makefile
    +++ b/src/gallium/drivers/softpipe/Makefile
    @@ -8,6 +8,7 @@ C_SOURCES = \
     	sp_fs_sse.c \
     	sp_buffer.c \
     	sp_clear.c \
    +	sp_fence.c \
     	sp_flush.c \
     	sp_query.c \
     	sp_context.c \
    diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript
    index 27ab00b036e..9949a53adfd 100644
    --- a/src/gallium/drivers/softpipe/SConscript
    +++ b/src/gallium/drivers/softpipe/SConscript
    @@ -11,6 +11,7 @@ softpipe = env.ConvenienceLibrary(
     		'sp_clear.c',
     		'sp_context.c',
     		'sp_draw_arrays.c',
    +		'sp_fence.c',
     		'sp_flush.c',
     		'sp_prim_vbuf.c',
     		'sp_setup.c',
    diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
    index 891a903e4f1..937a573092f 100644
    --- a/src/gallium/drivers/softpipe/sp_context.c
    +++ b/src/gallium/drivers/softpipe/sp_context.c
    @@ -44,6 +44,7 @@
     #include "sp_surface.h"
     #include "sp_tile_cache.h"
     #include "sp_tex_tile_cache.h"
    +#include "sp_texture.h"
     #include "sp_query.h"
     
     
    @@ -277,6 +278,7 @@ softpipe_create_context( struct pipe_screen *screen,
        softpipe->pipe.is_buffer_referenced = softpipe_is_buffer_referenced;
     
        softpipe_init_query_funcs( softpipe );
    +   softpipe_init_texture_funcs( &softpipe->pipe );
     
        softpipe->pipe.render_condition = softpipe_render_condition;
     
    @@ -285,13 +287,13 @@ softpipe_create_context( struct pipe_screen *screen,
         * Must be before quad stage setup!
         */
        for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
    -      softpipe->cbuf_cache[i] = sp_create_tile_cache( screen );
    -   softpipe->zsbuf_cache = sp_create_tile_cache( screen );
    +      softpipe->cbuf_cache[i] = sp_create_tile_cache( &softpipe->pipe );
    +   softpipe->zsbuf_cache = sp_create_tile_cache( &softpipe->pipe );
     
        for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
    -      softpipe->tex_cache[i] = sp_create_tex_tile_cache( screen );
    +      softpipe->tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe );
        for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
    -      softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache(screen);
    +      softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe );
        }
     
        /* setup quad rendering stages */
    diff --git a/src/gallium/drivers/softpipe/sp_fence.c b/src/gallium/drivers/softpipe/sp_fence.c
    new file mode 100644
    index 00000000000..66c52141132
    --- /dev/null
    +++ b/src/gallium/drivers/softpipe/sp_fence.c
    @@ -0,0 +1,70 @@
    +/**************************************************************************
    + *
    + * Copyright 2010 VMware, Inc.
    + * All Rights Reserved.
    + *
    + * Permission is hereby granted, free of charge, to any person obtaining a
    + * copy of this software and associated documentation files (the
    + * "Software"), to deal in the Software without restriction, including
    + * without limitation the rights to use, copy, modify, merge, publish,
    + * distribute, sub license, and/or sell copies of the Software, and to
    + * permit persons to whom the Software is furnished to do so, subject to
    + * the following conditions:
    + *
    + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
    + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
    + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
    + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
    + * USE OR OTHER DEALINGS IN THE SOFTWARE.
    + *
    + * The above copyright notice and this permission notice (including the
    + * next paragraph) shall be included in all copies or substantial portions
    + * of the Software.
    + *
    + **************************************************************************/
    +
    +
    +#include "pipe/p_screen.h"
    +#include "util/u_debug.h"
    +#include "sp_fence.h"
    +
    +
    +static void
    +softpipe_fence_reference(struct pipe_screen *screen,
    +                         struct pipe_fence_handle **ptr,
    +                         struct pipe_fence_handle *fence)
    +{
    +   assert(!*ptr);
    +   assert(!fence);
    +}
    +
    +
    +static int
    +softpipe_fence_signalled(struct pipe_screen *screen,
    +                         struct pipe_fence_handle *fence,
    +                         unsigned flags)
    +{
    +   assert(!fence);
    +   return 0;
    +}
    +
    +
    +static int
    +softpipe_fence_finish(struct pipe_screen *screen,
    +                      struct pipe_fence_handle *fence,
    +                      unsigned flags)
    +{
    +   assert(!fence);
    +   return 0;
    +}
    +
    +
    +void
    +softpipe_init_screen_fence_funcs(struct pipe_screen *screen)
    +{
    +   screen->fence_reference = softpipe_fence_reference;
    +   screen->fence_finish = softpipe_fence_finish;
    +   screen->fence_signalled = softpipe_fence_signalled;
    +}
    diff --git a/src/gallium/drivers/softpipe/sp_fence.h b/src/gallium/drivers/softpipe/sp_fence.h
    new file mode 100644
    index 00000000000..39c33243bd5
    --- /dev/null
    +++ b/src/gallium/drivers/softpipe/sp_fence.h
    @@ -0,0 +1,40 @@
    +/**************************************************************************
    + *
    + * Copyright 2010 VMware, Inc.
    + * All Rights Reserved.
    + *
    + * Permission is hereby granted, free of charge, to any person obtaining a
    + * copy of this software and associated documentation files (the
    + * "Software"), to deal in the Software without restriction, including
    + * without limitation the rights to use, copy, modify, merge, publish,
    + * distribute, sub license, and/or sell copies of the Software, and to
    + * permit persons to whom the Software is furnished to do so, subject to
    + * the following conditions:
    + *
    + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
    + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
    + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
    + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
    + * USE OR OTHER DEALINGS IN THE SOFTWARE.
    + *
    + * The above copyright notice and this permission notice (including the
    + * next paragraph) shall be included in all copies or substantial portions
    + * of the Software.
    + *
    + **************************************************************************/
    +
    +
    +#ifndef SP_FENCE_H_
    +#define SP_FENCE_H_
    +
    +
    +struct pipe_screen;
    +
    +
    +void
    +softpipe_init_screen_fence_funcs(struct pipe_screen *screen);
    +
    +
    +#endif /* SP_FENCE_H_ */
    diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
    index 38dea13c66b..508fe8f764d 100644
    --- a/src/gallium/drivers/softpipe/sp_flush.c
    +++ b/src/gallium/drivers/softpipe/sp_flush.c
    @@ -93,9 +93,9 @@ softpipe_flush( struct pipe_context *pipe,
           static unsigned frame_no = 1;
           static char filename[256];
           util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no);
    -      debug_dump_surface_bmp(filename, softpipe->framebuffer.cbufs[0]);
    +      debug_dump_surface_bmp(softpipe, filename, softpipe->framebuffer.cbufs[0]);
           util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no);
    -      debug_dump_surface_bmp(filename, softpipe->framebuffer.zsbuf);
    +      debug_dump_surface_bmp(softpipe, filename, softpipe->framebuffer.zsbuf);
           ++frame_no;
        }
     #endif
    diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
    index 27fa126b7c3..67e2c8f8bc4 100644
    --- a/src/gallium/drivers/softpipe/sp_fs_exec.c
    +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
    @@ -145,8 +145,13 @@ exec_run( const struct sp_fragment_shader *base,
              case TGSI_SEMANTIC_COLOR:
                 {
                    uint cbuf = sem_index[i];
    +
    +               assert(sizeof(quad->output.color[cbuf]) ==
    +                      sizeof(machine->Outputs[i]));
    +
    +               /* copy float[4][4] result */
                    memcpy(quad->output.color[cbuf],
    -                      &machine->Outputs[i].xyzw[0].f[0],
    +                      &machine->Outputs[i],
                           sizeof(quad->output.color[0]) );
                 }
                 break;
    diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c
    index acee2136706..daa158df7c4 100644
    --- a/src/gallium/drivers/softpipe/sp_fs_sse.c
    +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c
    @@ -156,8 +156,13 @@ fs_sse_run( const struct sp_fragment_shader *base,
              case TGSI_SEMANTIC_COLOR:
                 {
                    uint cbuf = sem_index[i];
    +
    +               assert(sizeof(quad->output.color[cbuf]) ==
    +                      sizeof(machine->Outputs[i]));
    +
    +               /* copy float[4][4] result */
                    memcpy(quad->output.color[cbuf],
    -                      &machine->Outputs[i].xyzw[0].f[0],
    +                      &machine->Outputs[i],
                           sizeof(quad->output.color[0]) );
                 }
                 break;
    diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
    index e0a2ef604e9..d62bfa3d633 100644
    --- a/src/gallium/drivers/softpipe/sp_screen.c
    +++ b/src/gallium/drivers/softpipe/sp_screen.c
    @@ -36,6 +36,7 @@
     #include "sp_screen.h"
     #include "sp_context.h"
     #include "sp_buffer.h"
    +#include "sp_fence.h"
     #include "sp_public.h"
     
     
    @@ -239,6 +240,7 @@ softpipe_create_screen(struct sw_winsys *winsys)
     
        softpipe_init_screen_texture_funcs(&screen->base);
        softpipe_init_screen_buffer_funcs(&screen->base);
    +   softpipe_init_screen_fence_funcs(&screen->base);
     
        return &screen->base;
     }
    diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
    index ef7ccf41898..fa9e19b282b 100644
    --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
    +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
    @@ -1614,7 +1614,6 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
        struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
        unsigned j;
        float ssss[4], tttt[4];
    -   unsigned face;
     
        /*
          major axis
    @@ -1628,7 +1627,8 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
          -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
        */
     
    -   /* First choose the cube face.
    +   /* Choose the cube face and compute new s/t coords for the 2D face.
    +    *
         * Use the same cube face for all four pixels in the quad.
         *
         * This isn't ideal, but if we want to use a different cube face
    @@ -1647,82 +1647,34 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
           const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
     
           if (arx >= ary && arx >= arz) {
    -         if (rx >= 0.0F) {
    -            face = PIPE_TEX_FACE_POS_X;
    -         }
    -         else {
    -            face = PIPE_TEX_FACE_NEG_X;
    +         float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
    +         uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
    +         for (j = 0; j < QUAD_SIZE; j++) {
    +            const float ima = -0.5F / fabsf(s[j]);
    +            ssss[j] = sign *  p[j] * ima + 0.5F;
    +            tttt[j] =         t[j] * ima + 0.5F;
    +            samp->faces[j] = face;
              }
           }
           else if (ary >= arx && ary >= arz) {
    -         if (ry >= 0.0F) {
    -            face = PIPE_TEX_FACE_POS_Y;
    -         }
    -         else {
    -            face = PIPE_TEX_FACE_NEG_Y;
    +         float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
    +         uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
    +         for (j = 0; j < QUAD_SIZE; j++) {
    +            const float ima = -0.5F / fabsf(t[j]);
    +            ssss[j] =        -s[j] * ima + 0.5F;
    +            tttt[j] = sign * -p[j] * ima + 0.5F;
    +            samp->faces[j] = face;
              }
           }
           else {
    -         if (rz > 0.0F) {
    -            face = PIPE_TEX_FACE_POS_Z;
    +         float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
    +         uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
    +         for (j = 0; j < QUAD_SIZE; j++) {
    +            const float ima = -0.5 / fabsf(p[j]);
    +            ssss[j] = sign * -s[j] * ima + 0.5F;
    +            tttt[j] =         t[j] * ima + 0.5F;
    +            samp->faces[j] = face;
              }
    -         else {
    -            face = PIPE_TEX_FACE_NEG_Z;
    -         }
    -      }
    -   }
    -
    -   /* Now compute the 2D _face_ texture coords from the
    -    * 3D _cube_ texture coords.
    -    */
    -   for (j = 0; j < QUAD_SIZE; j++) {
    -      const float rx = s[j], ry = t[j], rz = p[j];
    -      const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
    -      float sc, tc, ma;
    -
    -      switch (face) {
    -      case PIPE_TEX_FACE_POS_X:
    -         sc = -rz;
    -         tc = -ry;
    -         ma = arx;
    -         break;
    -      case PIPE_TEX_FACE_NEG_X:
    -         sc = rz;
    -         tc = -ry;
    -         ma = arx;
    -         break;
    -      case PIPE_TEX_FACE_POS_Y:
    -         sc = rx;
    -         tc = rz;
    -         ma = ary;
    -         break;
    -      case PIPE_TEX_FACE_NEG_Y:
    -         sc = rx;
    -         tc = -rz;
    -         ma = ary;
    -         break;
    -      case PIPE_TEX_FACE_POS_Z:
    -         sc = rx;
    -         tc = -ry;
    -         ma = arz;
    -         break;
    -      case PIPE_TEX_FACE_NEG_Z:
    -         sc = -rx;
    -         tc = -ry;
    -         ma = arz;
    -         break;
    -      default:
    -         assert(0 && "bad cube face");
    -         sc = 0.0F;
    -         tc = 0.0F;
    -         ma = 0.0F;
    -      }
    -
    -      {
    -	 const float ima = 1.0 / ma;
    -	 ssss[j] = ( sc * ima + 1.0F ) * 0.5F;
    -	 tttt[j] = ( tc * ima + 1.0F ) * 0.5F;
    -	 samp->faces[j] = face;
           }
        }
     
    diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
    index dfa002a79b4..6594514c38f 100644
    --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
    +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
    @@ -43,14 +43,14 @@
        
     
     struct softpipe_tex_tile_cache *
    -sp_create_tex_tile_cache( struct pipe_screen *screen )
    +sp_create_tex_tile_cache( struct pipe_context *pipe )
     {
        struct softpipe_tex_tile_cache *tc;
        uint pos;
     
        tc = CALLOC_STRUCT( softpipe_tex_tile_cache );
        if (tc) {
    -      tc->screen = screen;
    +      tc->pipe = pipe;
           for (pos = 0; pos < NUM_ENTRIES; pos++) {
              tc->entries[pos].addr.bits.invalid = 1;
           }
    @@ -63,19 +63,16 @@ sp_create_tex_tile_cache( struct pipe_screen *screen )
     void
     sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc)
     {
    -   struct pipe_screen *screen;
        uint pos;
     
        for (pos = 0; pos < NUM_ENTRIES; pos++) {
           /*assert(tc->entries[pos].x < 0);*/
        }
        if (tc->transfer) {
    -      screen = tc->transfer->texture->screen;
    -      screen->tex_transfer_destroy(tc->transfer);
    +      tc->pipe->tex_transfer_destroy(tc->pipe, tc->transfer);
        }
        if (tc->tex_trans) {
    -      screen = tc->tex_trans->texture->screen;
    -      screen->tex_transfer_destroy(tc->tex_trans);
    +      tc->pipe->tex_transfer_destroy(tc->pipe, tc->tex_trans);
        }
     
        FREE( tc );
    @@ -88,7 +85,7 @@ void
     sp_tex_tile_cache_map_transfers(struct softpipe_tex_tile_cache *tc)
     {
        if (tc->tex_trans && !tc->tex_trans_map)
    -      tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans);
    +      tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans);
     }
     
     
    @@ -96,7 +93,7 @@ void
     sp_tex_tile_cache_unmap_transfers(struct softpipe_tex_tile_cache *tc)
     {
        if (tc->tex_trans_map) {
    -      tc->screen->transfer_unmap(tc->screen, tc->tex_trans);
    +      tc->pipe->transfer_unmap(tc->pipe, tc->tex_trans);
           tc->tex_trans_map = NULL;
        }
     }
    @@ -134,14 +131,12 @@ sp_tex_tile_cache_set_sampler_view(struct softpipe_tex_tile_cache *tc,
           pipe_texture_reference(&tc->texture, texture);
     
           if (tc->tex_trans) {
    -         struct pipe_screen *screen = tc->tex_trans->texture->screen;
    -         
              if (tc->tex_trans_map) {
    -            screen->transfer_unmap(screen, tc->tex_trans);
    +            tc->pipe->transfer_unmap(tc->pipe, tc->tex_trans);
                 tc->tex_trans_map = NULL;
              }
     
    -         screen->tex_transfer_destroy(tc->tex_trans);
    +         tc->pipe->tex_transfer_destroy(tc->pipe, tc->tex_trans);
              tc->tex_trans = NULL;
           }
     
    @@ -213,7 +208,6 @@ const struct softpipe_tex_cached_tile *
     sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, 
                             union tex_tile_address addr )
     {
    -   struct pipe_screen *screen = tc->screen;
        struct softpipe_tex_cached_tile *tile;
        
        tile = tc->entries + tex_cache_pos( addr );
    @@ -241,16 +235,16 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
     
              if (tc->tex_trans) {
                 if (tc->tex_trans_map) {
    -               tc->screen->transfer_unmap(tc->screen, tc->tex_trans);
    +               tc->pipe->transfer_unmap(tc->pipe, tc->tex_trans);
                    tc->tex_trans_map = NULL;
                 }
     
    -            screen->tex_transfer_destroy(tc->tex_trans);
    +            tc->pipe->tex_transfer_destroy(tc->pipe, tc->tex_trans);
                 tc->tex_trans = NULL;
              }
     
              tc->tex_trans = 
    -            screen->get_tex_transfer(screen, tc->texture, 
    +            tc->pipe->get_tex_transfer(tc->pipe, tc->texture, 
                                          addr.bits.face, 
                                          addr.bits.level, 
                                          addr.bits.z, 
    @@ -258,7 +252,7 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
                                          u_minify(tc->texture->width0, addr.bits.level),
                                          u_minify(tc->texture->height0, addr.bits.level));
              
    -         tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
    +         tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans);
     
              tc->tex_face = addr.bits.face;
              tc->tex_level = addr.bits.level;
    @@ -266,7 +260,8 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
           }
     
           /* get tile from the transfer (view into texture) */
    -      pipe_get_tile_swizzle(tc->tex_trans,
    +      pipe_get_tile_swizzle(tc->pipe,
    +			    tc->tex_trans,
                                 addr.bits.x * TILE_SIZE, 
                                 addr.bits.y * TILE_SIZE,
                                 TILE_SIZE,
    diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
    index f8770409d87..12ae7ba12d6 100644
    --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
    +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
    @@ -70,7 +70,7 @@ struct softpipe_tex_cached_tile
     
     struct softpipe_tex_tile_cache
     {
    -   struct pipe_screen *screen;
    +   struct pipe_context *pipe;
        struct pipe_transfer *transfer;
        void *transfer_map;
     
    @@ -94,7 +94,7 @@ struct softpipe_tex_tile_cache
     
     
     extern struct softpipe_tex_tile_cache *
    -sp_create_tex_tile_cache( struct pipe_screen *screen );
    +sp_create_tex_tile_cache( struct pipe_context *pipe );
     
     extern void
     sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc);
    diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
    index 11d184effb9..da8529c154e 100644
    --- a/src/gallium/drivers/softpipe/sp_texture.c
    +++ b/src/gallium/drivers/softpipe/sp_texture.c
    @@ -256,7 +256,7 @@ softpipe_tex_surface_destroy(struct pipe_surface *surf)
      * \param height  height of region to read/write
      */
     static struct pipe_transfer *
    -softpipe_get_tex_transfer(struct pipe_screen *screen,
    +softpipe_get_tex_transfer(struct pipe_context *pipe,
                               struct pipe_texture *texture,
                               unsigned face, unsigned level, unsigned zslice,
                               enum pipe_transfer_usage usage,
    @@ -310,7 +310,8 @@ softpipe_get_tex_transfer(struct pipe_screen *screen,
      * softpipe_get_tex_transfer().
      */
     static void 
    -softpipe_tex_transfer_destroy(struct pipe_transfer *transfer)
    +softpipe_tex_transfer_destroy(struct pipe_context *pipe,
    +                              struct pipe_transfer *transfer)
     {
        /* Effectively do the texture_update work here - if texture images
         * needed post-processing to put them into hardware layout, this is
    @@ -326,7 +327,7 @@ softpipe_tex_transfer_destroy(struct pipe_transfer *transfer)
      * Create memory mapping for given pipe_transfer object.
      */
     static void *
    -softpipe_transfer_map( struct pipe_screen *screen,
    +softpipe_transfer_map( struct pipe_context *pipe,
                            struct pipe_transfer *transfer )
     {
        ubyte *map, *xfer_map;
    @@ -339,7 +340,7 @@ softpipe_transfer_map( struct pipe_screen *screen,
     
        if (spt->dt) {
           /* display target */
    -      struct sw_winsys *winsys = softpipe_screen(screen)->winsys;
    +      struct sw_winsys *winsys = softpipe_screen(pipe->screen)->winsys;
     
           map = winsys->displaytarget_map(winsys, spt->dt,
                                           pipe_transfer_buffer_flags(transfer));
    @@ -359,7 +360,7 @@ softpipe_transfer_map( struct pipe_screen *screen,
           /* Do something to notify sharing contexts of a texture change.
            * In softpipe, that would mean flushing the texture cache.
            */
    -      softpipe_screen(screen)->timestamp++;
    +      softpipe_screen(pipe->screen)->timestamp++;
        }
     
        xfer_map = map + softpipe_transfer(transfer)->offset +
    @@ -374,7 +375,7 @@ softpipe_transfer_map( struct pipe_screen *screen,
      * Unmap memory mapping for given pipe_transfer object.
      */
     static void
    -softpipe_transfer_unmap(struct pipe_screen *screen,
    +softpipe_transfer_unmap(struct pipe_context *pipe,
                             struct pipe_transfer *transfer)
     {
        struct softpipe_texture *spt;
    @@ -384,7 +385,7 @@ softpipe_transfer_unmap(struct pipe_screen *screen,
     
        if (spt->dt) {
           /* display target */
    -      struct sw_winsys *winsys = softpipe_screen(screen)->winsys;
    +      struct sw_winsys *winsys = softpipe_screen(pipe->screen)->winsys;
           winsys->displaytarget_unmap(winsys, spt->dt);
        }
     
    @@ -447,6 +448,15 @@ softpipe_video_surface_destroy(struct pipe_video_surface *vsfc)
     }
     
     
    +void
    +softpipe_init_texture_funcs(struct pipe_context *pipe)
    +{
    +   pipe->get_tex_transfer = softpipe_get_tex_transfer;
    +   pipe->tex_transfer_destroy = softpipe_tex_transfer_destroy;
    +   pipe->transfer_map = softpipe_transfer_map;
    +   pipe->transfer_unmap = softpipe_transfer_unmap;
    +}
    +
     void
     softpipe_init_screen_texture_funcs(struct pipe_screen *screen)
     {
    @@ -456,11 +466,6 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen)
        screen->get_tex_surface = softpipe_get_tex_surface;
        screen->tex_surface_destroy = softpipe_tex_surface_destroy;
     
    -   screen->get_tex_transfer = softpipe_get_tex_transfer;
    -   screen->tex_transfer_destroy = softpipe_tex_transfer_destroy;
    -   screen->transfer_map = softpipe_transfer_map;
    -   screen->transfer_unmap = softpipe_transfer_unmap;
    -
        screen->video_surface_create = softpipe_video_surface_create;
        screen->video_surface_destroy = softpipe_video_surface_destroy;
     }
    diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h
    index 1c8636d1d56..c0e6ba8a869 100644
    --- a/src/gallium/drivers/softpipe/sp_texture.h
    +++ b/src/gallium/drivers/softpipe/sp_texture.h
    @@ -107,5 +107,8 @@ softpipe_video_surface(struct pipe_video_surface *pvs)
     extern void
     softpipe_init_screen_texture_funcs(struct pipe_screen *screen);
     
    +void
    +softpipe_init_texture_funcs(struct pipe_context *pipe);
    +
     
     #endif /* SP_TEXTURE */
    diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
    index aedfdf1b469..1c3c2667d73 100644
    --- a/src/gallium/drivers/softpipe/sp_tile_cache.c
    +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
    @@ -79,20 +79,20 @@ clear_clear_flag(uint *bitvec, union tile_address addr)
        
     
     struct softpipe_tile_cache *
    -sp_create_tile_cache( struct pipe_screen *screen )
    +sp_create_tile_cache( struct pipe_context *pipe )
     {
        struct softpipe_tile_cache *tc;
        uint pos;
        int maxLevels, maxTexSize;
     
        /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */
    -   maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
    +   maxLevels = pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
        maxTexSize = 1 << (maxLevels - 1);
        assert(MAX_WIDTH >= maxTexSize);
     
        tc = CALLOC_STRUCT( softpipe_tile_cache );
        if (tc) {
    -      tc->screen = screen;
    +      tc->pipe = pipe;
           for (pos = 0; pos < NUM_ENTRIES; pos++) {
              tc->entries[pos].addr.bits.invalid = 1;
           }
    @@ -115,15 +115,13 @@ sp_create_tile_cache( struct pipe_screen *screen )
     void
     sp_destroy_tile_cache(struct softpipe_tile_cache *tc)
     {
    -   struct pipe_screen *screen;
        uint pos;
     
        for (pos = 0; pos < NUM_ENTRIES; pos++) {
           /*assert(tc->entries[pos].x < 0);*/
        }
        if (tc->transfer) {
    -      screen = tc->transfer->texture->screen;
    -      screen->tex_transfer_destroy(tc->transfer);
    +      tc->pipe->tex_transfer_destroy(tc->pipe, tc->transfer);
        }
     
        FREE( tc );
    @@ -137,27 +135,25 @@ void
     sp_tile_cache_set_surface(struct softpipe_tile_cache *tc,
                               struct pipe_surface *ps)
     {
    -   if (tc->transfer) {
    -      struct pipe_screen *screen = tc->transfer->texture->screen;
    +   struct pipe_context *pipe = tc->pipe;
     
    +   if (tc->transfer) {
           if (ps == tc->surface)
              return;
     
           if (tc->transfer_map) {
    -         screen->transfer_unmap(screen, tc->transfer);
    +         pipe->transfer_unmap(pipe, tc->transfer);
              tc->transfer_map = NULL;
           }
     
    -      screen->tex_transfer_destroy(tc->transfer);
    +      pipe->tex_transfer_destroy(pipe, tc->transfer);
           tc->transfer = NULL;
        }
     
        tc->surface = ps;
     
        if (ps) {
    -      struct pipe_screen *screen = ps->texture->screen;
    -
    -      tc->transfer = screen->get_tex_transfer(screen, ps->texture, ps->face,
    +      tc->transfer = pipe->get_tex_transfer(pipe, ps->texture, ps->face,
                                                   ps->level, ps->zslice,
                                                   PIPE_TRANSFER_READ_WRITE,
                                                   0, 0, ps->width, ps->height);
    @@ -187,7 +183,7 @@ void
     sp_tile_cache_map_transfers(struct softpipe_tile_cache *tc)
     {
        if (tc->transfer && !tc->transfer_map)
    -      tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer);
    +      tc->transfer_map = tc->pipe->transfer_map(tc->pipe, tc->transfer);
     }
     
     
    @@ -195,7 +191,7 @@ void
     sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc)
     {
        if (tc->transfer_map) {
    -      tc->screen->transfer_unmap(tc->screen, tc->transfer);
    +      tc->pipe->transfer_unmap(tc->pipe, tc->transfer);
           tc->transfer_map = NULL;
        }
     }
    @@ -295,7 +291,8 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc)
              union tile_address addr = tile_address(x, y);
     
              if (is_clear_flag_set(tc->clear_flags, addr)) {
    -            pipe_put_tile_raw(pt,
    +            pipe_put_tile_raw(tc->pipe,
    +                              pt,
                                   x, y, TILE_SIZE, TILE_SIZE,
                                   tc->tile.data.color32, 0/*STRIDE*/);
     
    @@ -329,14 +326,14 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc)
              struct softpipe_cached_tile *tile = tc->entries + pos;
              if (!tile->addr.bits.invalid) {
                 if (tc->depth_stencil) {
    -               pipe_put_tile_raw(pt,
    +               pipe_put_tile_raw(tc->pipe, pt,
                                      tile->addr.bits.x * TILE_SIZE, 
                                      tile->addr.bits.y * TILE_SIZE, 
                                      TILE_SIZE, TILE_SIZE,
                                      tile->data.depth32, 0/*STRIDE*/);
                 }
                 else {
    -               pipe_put_tile_rgba(pt,
    +               pipe_put_tile_rgba(tc->pipe, pt,
                                       tile->addr.bits.x * TILE_SIZE, 
                                       tile->addr.bits.y * TILE_SIZE, 
                                       TILE_SIZE, TILE_SIZE,
    @@ -379,14 +376,14 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc,
           if (tile->addr.bits.invalid == 0) {
              /* put dirty tile back in framebuffer */
              if (tc->depth_stencil) {
    -            pipe_put_tile_raw(pt,
    +            pipe_put_tile_raw(tc->pipe, pt,
                                   tile->addr.bits.x * TILE_SIZE,
                                   tile->addr.bits.y * TILE_SIZE,
                                   TILE_SIZE, TILE_SIZE,
                                   tile->data.depth32, 0/*STRIDE*/);
              }
              else {
    -            pipe_put_tile_rgba(pt,
    +            pipe_put_tile_rgba(tc->pipe, pt,
                                    tile->addr.bits.x * TILE_SIZE,
                                    tile->addr.bits.y * TILE_SIZE,
                                    TILE_SIZE, TILE_SIZE,
    @@ -409,14 +406,14 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc,
           else {
              /* get new tile data from transfer */
              if (tc->depth_stencil) {
    -            pipe_get_tile_raw(pt,
    +            pipe_get_tile_raw(tc->pipe, pt,
                                   tile->addr.bits.x * TILE_SIZE, 
                                   tile->addr.bits.y * TILE_SIZE, 
                                   TILE_SIZE, TILE_SIZE,
                                   tile->data.depth32, 0/*STRIDE*/);
              }
              else {
    -            pipe_get_tile_rgba(pt,
    +            pipe_get_tile_rgba(tc->pipe, pt,
                                    tile->addr.bits.x * TILE_SIZE, 
                                    tile->addr.bits.y * TILE_SIZE,
                                    TILE_SIZE, TILE_SIZE,
    diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h
    index a12092702a6..753d8c0daac 100644
    --- a/src/gallium/drivers/softpipe/sp_tile_cache.h
    +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h
    @@ -80,7 +80,7 @@ struct softpipe_cached_tile
     
     struct softpipe_tile_cache
     {
    -   struct pipe_screen *screen;
    +   struct pipe_context *pipe;
        struct pipe_surface *surface;  /**< the surface we're caching */
        struct pipe_transfer *transfer;
        void *transfer_map;
    @@ -98,7 +98,7 @@ struct softpipe_tile_cache
     
     
     extern struct softpipe_tile_cache *
    -sp_create_tile_cache( struct pipe_screen *screen );
    +sp_create_tile_cache( struct pipe_context *pipe );
     
     extern void
     sp_destroy_tile_cache(struct softpipe_tile_cache *tc);
    diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
    index d499ae6acc9..adb7840182b 100644
    --- a/src/gallium/drivers/svga/svga_context.c
    +++ b/src/gallium/drivers/svga/svga_context.c
    @@ -164,6 +164,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
        svga_init_constbuffer_functions(svga);
        svga_init_query_functions(svga);
     
    +   svga_init_texture_functions(&svga->pipe);
    +
        /* debug */
        svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE);
        svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE);
    diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
    index 5b581debfc7..107e4a39620 100644
    --- a/src/gallium/drivers/svga/svga_screen_texture.c
    +++ b/src/gallium/drivers/svga/svga_screen_texture.c
    @@ -783,15 +783,17 @@ svga_surface_needs_propagation(struct pipe_surface *surf)
        return s->dirty && s->handle != tex->handle;
     }
     
    -
    +/* XXX: Still implementing this as if it was a screen function, but
    + * can now modify it to queue transfers on the context.
    + */
     static struct pipe_transfer *
    -svga_get_tex_transfer(struct pipe_screen *screen,
    -                     struct pipe_texture *texture,
    -                     unsigned face, unsigned level, unsigned zslice,
    -                     enum pipe_transfer_usage usage, unsigned x, unsigned y,
    -                     unsigned w, unsigned h)
    +svga_get_tex_transfer(struct pipe_context *pipe,
    +		      struct pipe_texture *texture,
    +		      unsigned face, unsigned level, unsigned zslice,
    +		      enum pipe_transfer_usage usage, unsigned x, unsigned y,
    +		      unsigned w, unsigned h)
     {
    -   struct svga_screen *ss = svga_screen(screen);
    +   struct svga_screen *ss = svga_screen(pipe->screen);
        struct svga_winsys_screen *sws = ss->sws;
        struct svga_transfer *st;
        unsigned nblocksx = util_format_get_nblocksx(texture->format, w);
    @@ -859,11 +861,14 @@ no_hwbuf:
     }
     
     
    +/* XXX: Still implementing this as if it was a screen function, but
    + * can now modify it to queue transfers on the context.
    + */
     static void *
    -svga_transfer_map( struct pipe_screen *screen,
    +svga_transfer_map( struct pipe_context *pipe,
                        struct pipe_transfer *transfer )
     {
    -   struct svga_screen *ss = svga_screen(screen);
    +   struct svga_screen *ss = svga_screen(pipe->screen);
        struct svga_winsys_screen *sws = ss->sws;
        struct svga_transfer *st = svga_transfer(transfer);
     
    @@ -877,11 +882,14 @@ svga_transfer_map( struct pipe_screen *screen,
     }
     
     
    +/* XXX: Still implementing this as if it was a screen function, but
    + * can now modify it to queue transfers on the context.
    + */
     static void
    -svga_transfer_unmap(struct pipe_screen *screen,
    +svga_transfer_unmap(struct pipe_context *pipe,
                         struct pipe_transfer *transfer)
     {
    -   struct svga_screen *ss = svga_screen(screen);
    +   struct svga_screen *ss = svga_screen(pipe->screen);
        struct svga_winsys_screen *sws = ss->sws;
        struct svga_transfer *st = svga_transfer(transfer);
        
    @@ -891,10 +899,11 @@ svga_transfer_unmap(struct pipe_screen *screen,
     
     
     static void
    -svga_tex_transfer_destroy(struct pipe_transfer *transfer)
    +svga_tex_transfer_destroy(struct pipe_context *pipe,
    +                          struct pipe_transfer *transfer)
     {
        struct svga_texture *tex = svga_texture(transfer->texture);
    -   struct svga_screen *ss = svga_screen(transfer->texture->screen);
    +   struct svga_screen *ss = svga_screen(pipe->screen);
        struct svga_winsys_screen *sws = ss->sws;
        struct svga_transfer *st = svga_transfer(transfer);
     
    @@ -911,6 +920,17 @@ svga_tex_transfer_destroy(struct pipe_transfer *transfer)
        FREE(st);
     }
     
    +
    +void
    +svga_init_texture_functions(struct pipe_context *pipe)
    +{
    +   pipe->get_tex_transfer = svga_get_tex_transfer;
    +   pipe->transfer_map = svga_transfer_map;
    +   pipe->transfer_unmap = svga_transfer_unmap;
    +   pipe->tex_transfer_destroy = svga_tex_transfer_destroy;
    +}
    +
    +
     void
     svga_screen_init_texture_functions(struct pipe_screen *screen)
     {
    @@ -920,10 +940,6 @@ svga_screen_init_texture_functions(struct pipe_screen *screen)
        screen->texture_destroy = svga_texture_destroy;
        screen->get_tex_surface = svga_get_tex_surface;
        screen->tex_surface_destroy = svga_tex_surface_destroy;
    -   screen->get_tex_transfer = svga_get_tex_transfer;
    -   screen->transfer_map = svga_transfer_map;
    -   screen->transfer_unmap = svga_transfer_unmap;
    -   screen->tex_transfer_destroy = svga_tex_transfer_destroy;
     }
     
     /*********************************************************************** 
    diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h
    index ca6602b4369..96d035b12d8 100644
    --- a/src/gallium/drivers/svga/svga_screen_texture.h
    +++ b/src/gallium/drivers/svga/svga_screen_texture.h
    @@ -186,6 +186,9 @@ svga_surface_needs_propagation(struct pipe_surface *surf);
     extern void
     svga_screen_init_texture_functions(struct pipe_screen *screen);
     
    +void
    +svga_init_texture_functions(struct pipe_context *pipe);
    +
     enum SVGA3dSurfaceFormat
     svga_translate_format(enum pipe_format format);
     
    diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
    index 705ca29e8f5..4ee1bf2c353 100644
    --- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
    +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
    @@ -360,7 +360,9 @@ dump_dstreg(struct sh_dstreg dstreg,
        union {
           struct sh_reg reg;
           struct sh_dstreg dstreg;
    -   } u = { { 0 } };
    +   } u;
    +
    +   memset(&u, 0, sizeof(u));
     
        assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier );
     
    diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
    index f9555fb922c..5c24bd1f7df 100644
    --- a/src/gallium/drivers/trace/tr_context.c
    +++ b/src/gallium/drivers/trace/tr_context.c
    @@ -28,7 +28,9 @@
     #include "util/u_inlines.h"
     #include "util/u_memory.h"
     #include "util/u_simple_list.h"
    +#include "util/u_format.h"
     
    +#include "pipe/p_format.h"
     #include "pipe/p_screen.h"
     
     #include "tr_dump.h"
    @@ -1340,6 +1342,136 @@ trace_is_buffer_referenced( struct pipe_context *_pipe,
        return referenced;
     }
     
    +
    +/********************************************************************
    + * transfer
    + */
    +
    +
    +static struct pipe_transfer *
    +trace_context_get_tex_transfer(struct pipe_context *_context,
    +                              struct pipe_texture *_texture,
    +                              unsigned face, unsigned level,
    +                              unsigned zslice,
    +                              enum pipe_transfer_usage usage,
    +                              unsigned x, unsigned y, unsigned w, unsigned h)
    +{
    +   struct trace_context *tr_context = trace_context(_context);
    +   struct trace_texture *tr_tex = trace_texture(_texture);
    +   struct pipe_context *context = tr_context->pipe;
    +   struct pipe_texture *texture = tr_tex->texture;
    +   struct pipe_transfer *result = NULL;
    +
    +   assert(texture->screen == context->screen);
    +
    +   trace_dump_call_begin("pipe_context", "get_tex_transfer");
    +
    +   trace_dump_arg(ptr, context);
    +   trace_dump_arg(ptr, texture);
    +   trace_dump_arg(uint, face);
    +   trace_dump_arg(uint, level);
    +   trace_dump_arg(uint, zslice);
    +   trace_dump_arg(uint, usage);
    +
    +   trace_dump_arg(uint, x);
    +   trace_dump_arg(uint, y);
    +   trace_dump_arg(uint, w);
    +   trace_dump_arg(uint, h);
    +
    +   result = context->get_tex_transfer(context, texture, face, level, zslice, usage,
    +				      x, y, w, h);
    +
    +   trace_dump_ret(ptr, result);
    +
    +   trace_dump_call_end();
    +
    +   if (result)
    +      result = trace_transfer_create(tr_context, tr_tex, result);
    +
    +   return result;
    +}
    +
    +
    +static void
    +trace_context_tex_transfer_destroy(struct pipe_context *_context,
    +                                   struct pipe_transfer *_transfer)
    +{
    +   struct trace_context *tr_context = trace_context(_context);
    +   struct trace_transfer *tr_trans = trace_transfer(_transfer);
    +   struct pipe_context *context = tr_context->pipe;
    +   struct pipe_transfer *transfer = tr_trans->transfer;
    +
    +   trace_dump_call_begin("pipe_context", "tex_transfer_destroy");
    +
    +   trace_dump_arg(ptr, context);
    +   trace_dump_arg(ptr, transfer);
    +
    +   trace_dump_call_end();
    +
    +   trace_transfer_destroy(tr_context, tr_trans);
    +}
    +
    +
    +static void *
    +trace_context_transfer_map(struct pipe_context *_context,
    +                          struct pipe_transfer *_transfer)
    +{
    +   struct trace_context *tr_context = trace_context(_context);
    +   struct trace_transfer *tr_trans = trace_transfer(_transfer);
    +   struct pipe_context *context = tr_context->pipe;
    +   struct pipe_transfer *transfer = tr_trans->transfer;
    +   void *map;
    +
    +   map = context->transfer_map(context, transfer);
    +   if(map) {
    +      if(transfer->usage & PIPE_TRANSFER_WRITE) {
    +         assert(!tr_trans->map);
    +         tr_trans->map = map;
    +      }
    +   }
    +
    +   return map;
    +}
    +
    +
    +static void
    +trace_context_transfer_unmap(struct pipe_context *_context,
    +			     struct pipe_transfer *_transfer)
    +{
    +   struct trace_context *tr_ctx = trace_context(_context);
    +   struct trace_transfer *tr_trans = trace_transfer(_transfer);
    +   struct pipe_context *context = tr_ctx->pipe;
    +   struct pipe_transfer *transfer = tr_trans->transfer;
    +
    +   if(tr_trans->map) {
    +      size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride;
    +
    +      trace_dump_call_begin("pipe_context", "transfer_write");
    +
    +      trace_dump_arg(ptr, context);
    +
    +      trace_dump_arg(ptr, transfer);
    +
    +      trace_dump_arg_begin("stride");
    +      trace_dump_uint(transfer->stride);
    +      trace_dump_arg_end();
    +
    +      trace_dump_arg_begin("data");
    +      trace_dump_bytes(tr_trans->map, size);
    +      trace_dump_arg_end();
    +
    +      trace_dump_arg_begin("size");
    +      trace_dump_uint(size);
    +      trace_dump_arg_end();
    +
    +      trace_dump_call_end();
    +
    +      tr_trans->map = NULL;
    +   }
    +
    +   context->transfer_unmap(context, transfer);
    +}
    +
     static const struct debug_named_value rbug_blocker_flags[] = {
        {"before", 1},
        {"after", 2},
    @@ -1426,6 +1558,11 @@ trace_context_create(struct trace_screen *tr_scr,
        tr_ctx->base.is_texture_referenced = trace_is_texture_referenced;
        tr_ctx->base.is_buffer_referenced = trace_is_buffer_referenced;
     
    +   tr_ctx->base.get_tex_transfer = trace_context_get_tex_transfer;
    +   tr_ctx->base.tex_transfer_destroy = trace_context_tex_transfer_destroy;
    +   tr_ctx->base.transfer_map = trace_context_transfer_map;
    +   tr_ctx->base.transfer_unmap = trace_context_transfer_unmap;
    +
        tr_ctx->pipe = pipe;
     
        trace_screen_add_to_list(tr_scr, contexts, tr_ctx);
    diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c
    index 07f9de253ec..53ab8c686d8 100644
    --- a/src/gallium/drivers/trace/tr_rbug.c
    +++ b/src/gallium/drivers/trace/tr_rbug.c
    @@ -219,7 +219,7 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header,
        struct trace_texture *tr_tex = NULL;
        struct tr_list *ptr;
     
    -   struct pipe_screen *screen = tr_scr->screen;
    +   struct pipe_context *context = tr_scr->private_context;
        struct pipe_texture *tex;
        struct pipe_transfer *t;
     
    @@ -239,12 +239,12 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header,
        }
     
        tex = tr_tex->texture;
    -   t = screen->get_tex_transfer(tr_scr->screen, tex,
    -                                gptr->face, gptr->level, gptr->zslice,
    -                                PIPE_TRANSFER_READ,
    -                                gptr->x, gptr->y, gptr->w, gptr->h);
    +   t = context->get_tex_transfer(context, tex,
    +				 gptr->face, gptr->level, gptr->zslice,
    +				 PIPE_TRANSFER_READ,
    +				 gptr->x, gptr->y, gptr->w, gptr->h);
     
    -   map = screen->transfer_map(screen, t);
    +   map = context->transfer_map(context, t);
     
        rbug_send_texture_read_reply(tr_rbug->con, serial,
                                     t->texture->format,
    @@ -256,8 +256,8 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header,
                                     t->stride,
                                     NULL);
     
    -   screen->transfer_unmap(screen, t);
    -   screen->tex_transfer_destroy(t);
    +   context->transfer_unmap(context, t);
    +   context->tex_transfer_destroy(context, t);
     
        pipe_mutex_unlock(tr_scr->list_mutex);
     
    diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
    index 86ddb995405..25990bdac7f 100644
    --- a/src/gallium/drivers/trace/tr_screen.c
    +++ b/src/gallium/drivers/trace/tr_screen.c
    @@ -353,134 +353,8 @@ trace_screen_tex_surface_destroy(struct pipe_surface *_surface)
     }
     
     
    -/********************************************************************
    - * transfer
    - */
     
     
    -static struct pipe_transfer *
    -trace_screen_get_tex_transfer(struct pipe_screen *_screen,
    -                              struct pipe_texture *_texture,
    -                              unsigned face, unsigned level,
    -                              unsigned zslice,
    -                              enum pipe_transfer_usage usage,
    -                              unsigned x, unsigned y, unsigned w, unsigned h)
    -{
    -   struct trace_screen *tr_scr = trace_screen(_screen);
    -   struct trace_texture *tr_tex = trace_texture(_texture);
    -   struct pipe_screen *screen = tr_scr->screen;
    -   struct pipe_texture *texture = tr_tex->texture;
    -   struct pipe_transfer *result = NULL;
    -
    -   assert(texture->screen == screen);
    -
    -   trace_dump_call_begin("pipe_screen", "get_tex_transfer");
    -
    -   trace_dump_arg(ptr, screen);
    -   trace_dump_arg(ptr, texture);
    -   trace_dump_arg(uint, face);
    -   trace_dump_arg(uint, level);
    -   trace_dump_arg(uint, zslice);
    -   trace_dump_arg(uint, usage);
    -
    -   trace_dump_arg(uint, x);
    -   trace_dump_arg(uint, y);
    -   trace_dump_arg(uint, w);
    -   trace_dump_arg(uint, h);
    -
    -   result = screen->get_tex_transfer(screen, texture, face, level, zslice, usage,
    -                                     x, y, w, h);
    -
    -   trace_dump_ret(ptr, result);
    -
    -   trace_dump_call_end();
    -
    -   if (result)
    -      result = trace_transfer_create(tr_tex, result);
    -
    -   return result;
    -}
    -
    -
    -static void
    -trace_screen_tex_transfer_destroy(struct pipe_transfer *_transfer)
    -{
    -   struct trace_screen *tr_scr = trace_screen(_transfer->texture->screen);
    -   struct trace_transfer *tr_trans = trace_transfer(_transfer);
    -   struct pipe_screen *screen = tr_scr->screen;
    -   struct pipe_transfer *transfer = tr_trans->transfer;
    -
    -   trace_dump_call_begin("pipe_screen", "tex_transfer_destroy");
    -
    -   trace_dump_arg(ptr, screen);
    -   trace_dump_arg(ptr, transfer);
    -
    -   trace_dump_call_end();
    -
    -   trace_transfer_destroy(tr_trans);
    -}
    -
    -
    -static void *
    -trace_screen_transfer_map(struct pipe_screen *_screen,
    -                          struct pipe_transfer *_transfer)
    -{
    -   struct trace_screen *tr_scr = trace_screen(_screen);
    -   struct trace_transfer *tr_trans = trace_transfer(_transfer);
    -   struct pipe_screen *screen = tr_scr->screen;
    -   struct pipe_transfer *transfer = tr_trans->transfer;
    -   void *map;
    -
    -   map = screen->transfer_map(screen, transfer);
    -   if(map) {
    -      if(transfer->usage & PIPE_TRANSFER_WRITE) {
    -         assert(!tr_trans->map);
    -         tr_trans->map = map;
    -      }
    -   }
    -
    -   return map;
    -}
    -
    -
    -static void
    -trace_screen_transfer_unmap(struct pipe_screen *_screen,
    -                           struct pipe_transfer *_transfer)
    -{
    -   struct trace_screen *tr_scr = trace_screen(_screen);
    -   struct trace_transfer *tr_trans = trace_transfer(_transfer);
    -   struct pipe_screen *screen = tr_scr->screen;
    -   struct pipe_transfer *transfer = tr_trans->transfer;
    -
    -   if(tr_trans->map) {
    -      size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride;
    -
    -      trace_dump_call_begin("pipe_screen", "transfer_write");
    -
    -      trace_dump_arg(ptr, screen);
    -
    -      trace_dump_arg(ptr, transfer);
    -
    -      trace_dump_arg_begin("stride");
    -      trace_dump_uint(transfer->stride);
    -      trace_dump_arg_end();
    -
    -      trace_dump_arg_begin("data");
    -      trace_dump_bytes(tr_trans->map, size);
    -      trace_dump_arg_end();
    -
    -      trace_dump_arg_begin("size");
    -      trace_dump_uint(size);
    -      trace_dump_arg_end();
    -
    -      trace_dump_call_end();
    -
    -      tr_trans->map = NULL;
    -   }
    -
    -   screen->transfer_unmap(screen, transfer);
    -}
    -
     
     /********************************************************************
      * buffer
    @@ -901,10 +775,6 @@ trace_screen_create(struct pipe_screen *screen)
        tr_scr->base.texture_destroy = trace_screen_texture_destroy;
        tr_scr->base.get_tex_surface = trace_screen_get_tex_surface;
        tr_scr->base.tex_surface_destroy = trace_screen_tex_surface_destroy;
    -   tr_scr->base.get_tex_transfer = trace_screen_get_tex_transfer;
    -   tr_scr->base.tex_transfer_destroy = trace_screen_tex_transfer_destroy;
    -   tr_scr->base.transfer_map = trace_screen_transfer_map;
    -   tr_scr->base.transfer_unmap = trace_screen_transfer_unmap;
        tr_scr->base.buffer_create = trace_screen_buffer_create;
        tr_scr->base.user_buffer_create = trace_screen_user_buffer_create;
        if (screen->buffer_map)
    @@ -920,7 +790,11 @@ trace_screen_create(struct pipe_screen *screen)
        tr_scr->base.fence_signalled = trace_screen_fence_signalled;
        tr_scr->base.fence_finish = trace_screen_fence_finish;
        tr_scr->base.flush_frontbuffer = trace_screen_flush_frontbuffer;
    +
        tr_scr->screen = screen;
    +   tr_scr->private_context = screen->context_create(screen, NULL);
    +   if (tr_scr->private_context == NULL)
    +      goto error3;
     
        trace_dump_ret(ptr, screen);
        trace_dump_call_end();
    @@ -930,10 +804,8 @@ trace_screen_create(struct pipe_screen *screen)
     
        return &tr_scr->base;
     
    -#if 0
     error3:
        FREE(tr_scr);
    -#endif
     error2:
        trace_dump_ret(ptr, screen);
        trace_dump_call_end();
    diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h
    index 597e2fc2650..9bfbe72e2c0 100644
    --- a/src/gallium/drivers/trace/tr_screen.h
    +++ b/src/gallium/drivers/trace/tr_screen.h
    @@ -56,6 +56,7 @@ struct trace_screen
        struct pipe_screen base;
     
        struct pipe_screen *screen;
    +   struct pipe_context *private_context;
     
        /* remote debugger */
        struct trace_rbug *rbug;
    diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c
    index 5321d68ec0c..d818e21bb82 100644
    --- a/src/gallium/drivers/trace/tr_texture.c
    +++ b/src/gallium/drivers/trace/tr_texture.c
    @@ -31,6 +31,7 @@
     #include "util/u_simple_list.h"
     
     #include "tr_screen.h"
    +#include "tr_context.h"
     #include "tr_texture.h"
     
     
    @@ -124,8 +125,9 @@ trace_surface_destroy(struct trace_surface *tr_surf)
     
     
     struct pipe_transfer *
    -trace_transfer_create(struct trace_texture *tr_tex,
    -                     struct pipe_transfer *transfer)
    +trace_transfer_create(struct trace_context *tr_ctx,
    +		      struct trace_texture *tr_tex,
    +		      struct pipe_transfer *transfer)
     {
        struct trace_screen *tr_scr = trace_screen(tr_tex->base.screen);
        struct trace_transfer *tr_trans;
    @@ -142,8 +144,9 @@ trace_transfer_create(struct trace_texture *tr_tex,
        memcpy(&tr_trans->base, transfer, sizeof(struct pipe_transfer));
     
        tr_trans->base.texture = NULL;
    -   pipe_texture_reference(&tr_trans->base.texture, &tr_tex->base);
        tr_trans->transfer = transfer;
    +
    +   pipe_texture_reference(&tr_trans->base.texture, &tr_tex->base);
        assert(tr_trans->base.texture == &tr_tex->base);
     
        trace_screen_add_to_list(tr_scr, transfers, tr_trans);
    @@ -151,21 +154,23 @@ trace_transfer_create(struct trace_texture *tr_tex,
        return &tr_trans->base;
     
     error:
    -   transfer->texture->screen->tex_transfer_destroy(transfer);
    +   tr_ctx->pipe->tex_transfer_destroy(tr_ctx->pipe, transfer);
        return NULL;
     }
     
     
     void
    -trace_transfer_destroy(struct trace_transfer *tr_trans)
    +trace_transfer_destroy(struct trace_context *tr_context,
    +                       struct trace_transfer *tr_trans)
     {
    -   struct trace_screen *tr_scr = trace_screen(tr_trans->base.texture->screen);
    -   struct pipe_screen *screen = tr_trans->transfer->texture->screen;
    +   struct trace_screen *tr_scr = trace_screen(tr_context->base.screen);
    +   struct pipe_context *context = tr_context->pipe;
    +   struct pipe_transfer *transfer = tr_trans->transfer;
     
        trace_screen_remove_from_list(tr_scr, transfers, tr_trans);
     
        pipe_texture_reference(&tr_trans->base.texture, NULL);
    -   screen->tex_transfer_destroy(tr_trans->transfer);
    +   context->tex_transfer_destroy(context, transfer);
        FREE(tr_trans);
     }
     
    diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h
    index 3a99dcdaece..66250465e44 100644
    --- a/src/gallium/drivers/trace/tr_texture.h
    +++ b/src/gallium/drivers/trace/tr_texture.h
    @@ -34,6 +34,7 @@
     
     #include "tr_screen.h"
     
    +struct trace_context;
     
     struct trace_texture
     {
    @@ -68,6 +69,7 @@ struct trace_transfer
        struct pipe_transfer base;
     
        struct pipe_transfer *transfer;
    +   struct pipe_context *pipe;
     
        struct tr_list list;
     
    @@ -129,11 +131,13 @@ void
     trace_surface_destroy(struct trace_surface *tr_surf);
     
     struct pipe_transfer *
    -trace_transfer_create(struct trace_texture *tr_tex,
    -                      struct pipe_transfer *transfer);
    +trace_transfer_create(struct trace_context *tr_ctx,
    +		      struct trace_texture *tr_tex,
    +		      struct pipe_transfer *transfer);
     
     void
    -trace_transfer_destroy(struct trace_transfer *tr_trans);
    +trace_transfer_destroy(struct trace_context *tr_ctx,
    +                       struct trace_transfer *tr_trans);
     
     
     #endif /* TR_TEXTURE_H_ */
    diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
    index 17fad1fa243..d1b734a9f9a 100644
    --- a/src/gallium/include/pipe/p_context.h
    +++ b/src/gallium/include/pipe/p_context.h
    @@ -316,6 +316,32 @@ struct pipe_context {
     
        void (*sampler_view_destroy)(struct pipe_context *ctx,
                                     struct pipe_sampler_view *view);
    +
    +
    +   /**
    +    * Get a transfer object for transferring data to/from a texture.
    +    *
    +    * Transfers are (by default) context-private and allow uploads to be
    +    * interleaved with
    +    */
    +   struct pipe_transfer *(*get_tex_transfer)(struct pipe_context *,
    +                                             struct pipe_texture *texture,
    +                                             unsigned face, unsigned level,
    +                                             unsigned zslice,
    +                                             enum pipe_transfer_usage usage,
    +                                             unsigned x, unsigned y,
    +                                             unsigned w, unsigned h);
    +
    +   void (*tex_transfer_destroy)(struct pipe_context *,
    +                                struct pipe_transfer *);
    +   
    +   void *(*transfer_map)( struct pipe_context *,
    +                          struct pipe_transfer *transfer );
    +
    +   void (*transfer_unmap)( struct pipe_context *,
    +                           struct pipe_transfer *transfer );
    +
    +
     };
     
     
    diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
    index 690455f7222..b7cb83abbe5 100644
    --- a/src/gallium/include/pipe/p_screen.h
    +++ b/src/gallium/include/pipe/p_screen.h
    @@ -142,23 +142,6 @@ struct pipe_screen {
        void (*tex_surface_destroy)(struct pipe_surface *);
        
     
    -   /** Get a transfer object for transferring data to/from a texture */
    -   struct pipe_transfer *(*get_tex_transfer)(struct pipe_screen *,
    -                                             struct pipe_texture *texture,
    -                                             unsigned face, unsigned level,
    -                                             unsigned zslice,
    -                                             enum pipe_transfer_usage usage,
    -                                             unsigned x, unsigned y,
    -                                             unsigned w, unsigned h);
    -
    -   void (*tex_transfer_destroy)(struct pipe_transfer *);
    -   
    -   void *(*transfer_map)( struct pipe_screen *,
    -                          struct pipe_transfer *transfer );
    -
    -   void (*transfer_unmap)( struct pipe_screen *,
    -                           struct pipe_transfer *transfer );
    -
     
        /**
         * Create a new buffer.
    @@ -265,6 +248,7 @@ struct pipe_screen {
     
        /**
         * Do any special operations to ensure buffer size is correct
    +    * \param context_private  the private data of the calling context
         */
        void (*update_buffer)( struct pipe_screen *ws,
                               void *context_private );
    @@ -272,10 +256,12 @@ struct pipe_screen {
        /**
         * Do any special operations to ensure frontbuffer contents are
         * displayed, eg copy fake frontbuffer.
    +    * \param winsys_drawable_handle  an opaque handle that the calling context
    +    *                                gets out-of-band
         */
        void (*flush_frontbuffer)( struct pipe_screen *screen,
                                   struct pipe_surface *surf,
    -                              void *context_private );
    +                              void *winsys_drawable_handle );
     
     
     
    diff --git a/src/gallium/state_trackers/dri/dri_extensions.c b/src/gallium/state_trackers/dri/dri_extensions.c
    index 1259813a412..800677a2d1e 100644
    --- a/src/gallium/state_trackers/dri/dri_extensions.c
    +++ b/src/gallium/state_trackers/dri/dri_extensions.c
    @@ -33,110 +33,14 @@
     #include "dri_context.h"
     #include "state_tracker/st_context.h"
     
    -#define need_GL_ARB_map_buffer_range
    -#define need_GL_ARB_multisample
    -#define need_GL_ARB_occlusion_query
    -#define need_GL_ARB_point_parameters
    -#define need_GL_ARB_provoking_vertex
    -#define need_GL_ARB_shader_objects
    -#define need_GL_ARB_texture_compression
    -#define need_GL_ARB_vertex_array_object
    -#define need_GL_ARB_vertex_buffer_object
    -#define need_GL_ARB_vertex_program
    -#define need_GL_ARB_vertex_shader
    -#define need_GL_ARB_window_pos
    -#define need_GL_EXT_blend_color
    -#define need_GL_EXT_blend_equation_separate
    -#define need_GL_EXT_blend_func_separate
    -#define need_GL_EXT_blend_minmax
    -#define need_GL_EXT_cull_vertex
    -#define need_GL_EXT_draw_buffers2
    -#define need_GL_EXT_fog_coord
    -#define need_GL_EXT_framebuffer_object
    -#define need_GL_EXT_multi_draw_arrays
    -#define need_GL_EXT_provoking_vertex
    -#define need_GL_EXT_secondary_color
    -#define need_GL_EXT_stencil_two_side
    -#define need_GL_APPLE_vertex_array_object
    -#define need_GL_NV_vertex_program
    -#define need_GL_VERSION_2_0
    -#define need_GL_VERSION_2_1
    -#include "main/remap_helper.h"
     #include "utils.h"
     
    -/**
    - * Extension strings exported by the driver.
    - */
    -static const struct dri_extension card_extensions[] = {
    -   {"GL_ARB_fragment_shader", NULL},
    -   {"GL_ARB_map_buffer_range", GL_ARB_map_buffer_range_functions},
    -   {"GL_ARB_multisample", GL_ARB_multisample_functions},
    -   {"GL_ARB_multitexture", NULL},
    -   {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions},
    -   {"GL_ARB_pixel_buffer_object", NULL},
    -   {"GL_ARB_provoking_vertex", GL_ARB_provoking_vertex_functions},
    -   {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
    -   {"GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
    -   {"GL_ARB_shading_language_120", GL_VERSION_2_1_functions },
    -   {"GL_ARB_shader_objects", GL_ARB_shader_objects_functions},
    -   {"GL_ARB_texture_border_clamp", NULL},
    -   {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions},
    -   {"GL_ARB_texture_cube_map", NULL},
    -   {"GL_ARB_texture_env_add", NULL},
    -   {"GL_ARB_texture_env_combine", NULL},
    -   {"GL_ARB_texture_env_dot3", NULL},
    -   {"GL_ARB_texture_mirrored_repeat", NULL},
    -   {"GL_ARB_texture_non_power_of_two", NULL},
    -   {"GL_ARB_texture_rectangle", NULL},
    -   {"GL_ARB_vertex_array_object", GL_ARB_vertex_array_object_functions},
    -   {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions},
    -   {"GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions},
    -   {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
    -   {"GL_ARB_window_pos", GL_ARB_window_pos_functions},
    -   {"GL_EXT_blend_color", GL_EXT_blend_color_functions},
    -   {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
    -   {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
    -   {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
    -   {"GL_EXT_blend_subtract", NULL},
    -   {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions},
    -   {"GL_EXT_draw_buffers2", GL_EXT_draw_buffers2_functions},
    -   {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions},
    -   {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions},
    -   {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions},
    -   {"GL_EXT_packed_depth_stencil", NULL},
    -   {"GL_EXT_pixel_buffer_object", NULL},
    -   {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions},
    -   {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
    -   {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions},
    -   {"GL_EXT_stencil_wrap", NULL},
    -   {"GL_EXT_texture_edge_clamp", NULL},
    -   {"GL_EXT_texture_env_combine", NULL},
    -   {"GL_EXT_texture_env_dot3", NULL},
    -   {"GL_EXT_texture_filter_anisotropic", NULL},
    -   {"GL_EXT_texture_lod_bias", NULL},
    -   {"GL_3DFX_texture_compression_FXT1", NULL},
    -   {"GL_APPLE_client_storage", NULL},
    -   {"GL_APPLE_vertex_array_object", GL_APPLE_vertex_array_object_functions},
    -   {"GL_MESA_pack_invert", NULL},
    -   {"GL_MESA_ycbcr_texture", NULL},
    -   {"GL_NV_blend_square", NULL},
    -   {"GL_NV_vertex_program", GL_NV_vertex_program_functions},
    -   {"GL_NV_vertex_program1_1", NULL},
    -   {"GL_SGIS_generate_mipmap", NULL},
    -   {NULL, NULL}
    -};
    -
     void
     dri_init_extensions(struct dri_context *ctx)
     {
    -   /* The card_extensions list should be pruned according to the
    -    * capabilities of the pipe_screen. This is actually something
    -    * that can/should be done inside st_create_context().
    -    * XXX Not pruning is very bogus. Always all these extensions above
    -    * will be advertized, regardless what st_init_extensions
    -    * (which depends on the pipe cap bits) does.
    -    */
    -   driInitExtensions(ctx->st->ctx, card_extensions, GL_TRUE);
    +   /* New extensions should be added in mesa/state_tracker/st_extensions.c
    +    * and not in this file. */
    +   driInitExtensions(ctx->st->ctx, NULL, GL_FALSE);
     }
     
     /* vim: set sw=3 ts=8 sts=3 expandtab: */
    diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c
    index 60bc560049c..7ccad8f5dd6 100644
    --- a/src/gallium/state_trackers/dri/dri_screen.c
    +++ b/src/gallium/state_trackers/dri/dri_screen.c
    @@ -90,6 +90,9 @@ dri_fill_in_modes(struct dri_screen *screen,
     		  unsigned pixel_bits)
     {
        __DRIconfig **configs = NULL;
    +   __DRIconfig **configs_r5g6b5 = NULL;
    +   __DRIconfig **configs_a8r8g8b8 = NULL;
    +   __DRIconfig **configs_x8r8g8b8 = NULL;
        unsigned num_modes;
        uint8_t depth_bits_array[5];
        uint8_t stencil_bits_array[5];
    @@ -127,25 +130,23 @@ dri_fill_in_modes(struct dri_screen *screen,
        pf_x8r8g8b8 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_B8G8R8X8_UNORM,
     					       PIPE_TEXTURE_2D,
     					       PIPE_TEXTURE_USAGE_RENDER_TARGET, 0);
    +   pf_r5g6b5 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_B5G6R5_UNORM,
    +					     PIPE_TEXTURE_2D,
    +					     PIPE_TEXTURE_USAGE_RENDER_TARGET, 0);
     
    -   /* we support buffers with different depths only if we can tell the driver
    -    * the actual depth of each of them. */
    -   if (screen->sPriv->dri2.loader
    -       && (screen->sPriv->dri2.loader->base.version > 2)
    -       && (screen->sPriv->dri2.loader->getBuffersWithFormat != NULL)) {
    +   /* We can only get a 16 or 32 bit depth buffer with getBuffersWithFormat */
    +   if (screen->sPriv->dri2.loader &&
    +       (screen->sPriv->dri2.loader->base.version > 2) &&
    +       (screen->sPriv->dri2.loader->getBuffersWithFormat != NULL)) {
           pf_z16 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_Z16_UNORM,
                                                  PIPE_TEXTURE_2D,
                                                  PIPE_TEXTURE_USAGE_DEPTH_STENCIL, 0);
           pf_z32 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_Z32_UNORM,
                                                  PIPE_TEXTURE_2D,
                                                  PIPE_TEXTURE_USAGE_DEPTH_STENCIL, 0);
    -      pf_r5g6b5 = p_screen->is_format_supported(p_screen, PIPE_FORMAT_B5G6R5_UNORM,
    -                                                PIPE_TEXTURE_2D,
    -                                                PIPE_TEXTURE_USAGE_RENDER_TARGET, 0);
        } else {
           pf_z16 = FALSE;
           pf_z32 = FALSE;
    -      pf_r5g6b5 = FALSE;
        }
     
        if (pf_z16) {
    @@ -175,46 +176,48 @@ dri_fill_in_modes(struct dri_screen *screen,
        num_modes =
           depth_buffer_factor * back_buffer_factor * msaa_samples_factor * 4;
     
    -   if (pixel_bits == 16 && pf_r5g6b5) {
    -      configs = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
    -				 depth_bits_array, stencil_bits_array,
    -				 depth_buffer_factor, back_buffer_modes,
    -				 back_buffer_factor,
    -				 msaa_samples_array, msaa_samples_factor,
    -				 GL_TRUE);
    +   if (pf_r5g6b5)
    +      configs_r5g6b5 = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
    +                                        depth_bits_array, stencil_bits_array,
    +                                        depth_buffer_factor, back_buffer_modes,
    +                                        back_buffer_factor,
    +                                        msaa_samples_array, msaa_samples_factor,
    +                                        GL_TRUE);
    +
    +   if (pf_a8r8g8b8)
    +      configs_a8r8g8b8 = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
    +                                          depth_bits_array,
    +                                          stencil_bits_array,
    +                                          depth_buffer_factor,
    +                                          back_buffer_modes,
    +                                          back_buffer_factor,
    +                                          msaa_samples_array,
    +                                          msaa_samples_factor,
    +                                          GL_TRUE);
    +
    +   if (pf_x8r8g8b8)
    +      configs_x8r8g8b8 = driCreateConfigs(GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV,
    +                                          depth_bits_array,
    +                                          stencil_bits_array,
    +                                          depth_buffer_factor,
    +                                          back_buffer_modes,
    +                                          back_buffer_factor,
    +                                          msaa_samples_array,
    +                                          msaa_samples_factor,
    +                                          GL_TRUE);
    +
    +   if (pixel_bits == 16) {
    +      configs = configs_r5g6b5;
    +      if (configs_a8r8g8b8)
    +         configs = configs ? driConcatConfigs(configs, configs_a8r8g8b8) : configs_a8r8g8b8;
    +      if (configs_x8r8g8b8)
    +	 configs = configs ? driConcatConfigs(configs, configs_x8r8g8b8) : configs_x8r8g8b8;
        } else {
    -      __DRIconfig **configs_a8r8g8b8 = NULL;
    -      __DRIconfig **configs_x8r8g8b8 = NULL;
    -
    -      if (pf_a8r8g8b8)
    -	 configs_a8r8g8b8 = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
    -					     depth_bits_array,
    -					     stencil_bits_array,
    -					     depth_buffer_factor,
    -					     back_buffer_modes,
    -					     back_buffer_factor,
    -					     msaa_samples_array,
    -                                             msaa_samples_factor,
    -					     GL_TRUE);
    -      if (pf_x8r8g8b8)
    -	 configs_x8r8g8b8 = driCreateConfigs(GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV,
    -					     depth_bits_array,
    -					     stencil_bits_array,
    -					     depth_buffer_factor,
    -					     back_buffer_modes,
    -					     back_buffer_factor,
    -					     msaa_samples_array,
    -                                             msaa_samples_factor,
    -					     GL_TRUE);
    -
    -      if (configs_a8r8g8b8 && configs_x8r8g8b8)
    -	 configs = driConcatConfigs(configs_x8r8g8b8, configs_a8r8g8b8);
    -      else if (configs_a8r8g8b8)
    -	 configs = configs_a8r8g8b8;
    -      else if (configs_x8r8g8b8)
    -	 configs = configs_x8r8g8b8;
    -      else
    -	 configs = NULL;
    +      configs = configs_a8r8g8b8;
    +      if (configs_x8r8g8b8)
    +	 configs = configs ? driConcatConfigs(configs, configs_x8r8g8b8) : configs_x8r8g8b8;
    +      if (configs_r5g6b5)
    +         configs = configs ? driConcatConfigs(configs, configs_r5g6b5) : configs_r5g6b5;
        }
     
        if (configs == NULL) {
    diff --git a/src/gallium/state_trackers/egl/x11/native_ximage.c b/src/gallium/state_trackers/egl/x11/native_ximage.c
    index e0d12acabe8..c6b16354f9b 100644
    --- a/src/gallium/state_trackers/egl/x11/native_ximage.c
    +++ b/src/gallium/state_trackers/egl/x11/native_ximage.c
    @@ -82,6 +82,8 @@ struct ximage_surface {
        int width, height;
        struct ximage_buffer buffers[NUM_NATIVE_ATTACHMENTS];
        uint valid_mask;
    +
    +   struct pipe_surface *draw_surface;
     };
     
     struct ximage_config {
    @@ -266,16 +268,20 @@ ximage_surface_draw_buffer(struct native_surface *nsurf,
     
        assert(xsurf->drawable && xbuf->texture);
     
    -   /* what's the cost of surface creation? */
    -   psurf = screen->get_tex_surface(screen,
    -         xbuf->texture, 0, 0, 0, PIPE_BUFFER_USAGE_CPU_READ);
    -   if (!psurf)
    -      return FALSE;
    +   psurf = xsurf->draw_surface;
    +   if (!psurf || psurf->texture != xbuf->texture) {
    +      pipe_surface_reference(&xsurf->draw_surface, NULL);
    +
    +      psurf = screen->get_tex_surface(screen,
    +            xbuf->texture, 0, 0, 0, PIPE_BUFFER_USAGE_CPU_READ);
    +      if (!psurf)
    +         return FALSE;
    +
    +      xsurf->draw_surface = psurf;
    +   }
     
        screen->flush_frontbuffer(screen, psurf, &xbuf->xdraw);
     
    -   pipe_surface_reference(&psurf, NULL);
    -
        return TRUE;
     }
     
    @@ -371,6 +377,8 @@ ximage_surface_destroy(struct native_surface *nsurf)
        struct ximage_surface *xsurf = ximage_surface(nsurf);
        int i;
     
    +   pipe_surface_reference(&xsurf->draw_surface, NULL);
    +
        for (i = 0; i < NUM_NATIVE_ATTACHMENTS; i++)
           ximage_surface_free_buffer(&xsurf->base, i);
     
    diff --git a/src/gallium/state_trackers/python/SConscript b/src/gallium/state_trackers/python/SConscript
    index 781f54bf2b9..d0d141fd249 100644
    --- a/src/gallium/state_trackers/python/SConscript
    +++ b/src/gallium/state_trackers/python/SConscript
    @@ -3,7 +3,8 @@ import os.path
     
     Import('*')
     
    -if 'python' in env['statetrackers']:
    +if 'python' in env['statetrackers'] and 0:
    +    # FIXME: Disable python state tracker until transfers are done by contexts  
     
         env = env.Clone()
         
    diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c
    index 02248ad4337..18e2cc1f250 100644
    --- a/src/gallium/state_trackers/vega/api_filters.c
    +++ b/src/gallium/state_trackers/vega/api_filters.c
    @@ -78,14 +78,14 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx,
     
        { /* upload color_data */
           struct pipe_transfer *transfer =
    -         screen->get_tex_transfer(screen, tex,
    -                                  0, 0, 0,
    -                                  PIPE_TRANSFER_READ_WRITE ,
    -                                  0, 0, tex->width0, tex->height0);
    -      void *map = screen->transfer_map(screen, transfer);
    +         pipe->get_tex_transfer(pipe, tex,
    +				0, 0, 0,
    +				PIPE_TRANSFER_READ_WRITE ,
    +				0, 0, tex->width0, tex->height0);
    +      void *map = pipe->transfer_map(pipe, transfer);
           memcpy(map, color_data, sizeof(VGint)*color_data_len);
    -      screen->transfer_unmap(screen, transfer);
    -      screen->tex_transfer_destroy(transfer);
    +      pipe->transfer_unmap(pipe, transfer);
    +      pipe->tex_transfer_destroy(pipe, transfer);
        }
     
        return tex;
    diff --git a/src/gallium/state_trackers/vega/api_images.c b/src/gallium/state_trackers/vega/api_images.c
    index 015241498ed..fec473d9d23 100644
    --- a/src/gallium/state_trackers/vega/api_images.c
    +++ b/src/gallium/state_trackers/vega/api_images.c
    @@ -397,7 +397,6 @@ void vgReadPixels(void * data, VGint dataStride,
     {
        struct vg_context *ctx = vg_current_context();
        struct pipe_context *pipe = ctx->pipe;
    -   struct pipe_screen *screen = pipe->screen;
     
        struct st_framebuffer *stfb = ctx->draw_buffer;
        struct st_renderbuffer *strb = stfb->strb;
    @@ -442,7 +441,7 @@ void vgReadPixels(void * data, VGint dataStride,
        {
           struct pipe_transfer *transfer;
     
    -      transfer = screen->get_tex_transfer(screen, strb->texture,  0, 0, 0,
    +      transfer = pipe->get_tex_transfer(pipe, strb->texture,  0, 0, 0,
                                               PIPE_TRANSFER_READ,
                                               0, 0, width, height);
     
    @@ -451,14 +450,14 @@ void vgReadPixels(void * data, VGint dataStride,
     #if 0
              debug_printf("%d-%d  == %d\n", sy, height, y);
     #endif
    -         pipe_get_tile_rgba(transfer, sx, y, width, 1, df);
    +         pipe_get_tile_rgba(pipe, transfer, sx, y, width, 1, df);
              y += yStep;
              _vega_pack_rgba_span_float(ctx, width, temp, dataFormat,
                                         dst + yoffset + xoffset);
              dst += dataStride;
           }
     
    -      screen->tex_transfer_destroy(transfer);
    +      pipe->tex_transfer_destroy(pipe, transfer);
        }
     }
     
    diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c
    index 41c979bfecf..a71579cd264 100644
    --- a/src/gallium/state_trackers/vega/image.c
    +++ b/src/gallium/state_trackers/vega/image.c
    @@ -378,7 +378,7 @@ void image_sub_data(struct vg_image *image,
        VGfloat *df = (VGfloat*)temp;
        VGint i;
        struct vg_context *ctx = vg_current_context();
    -   struct pipe_screen *screen = ctx->pipe->screen;
    +   struct pipe_context *pipe = ctx->pipe;
        struct pipe_texture *texture = image_texture(image);
        VGint xoffset = 0, yoffset = 0;
     
    @@ -412,17 +412,17 @@ void image_sub_data(struct vg_image *image,
        }
     
        { /* upload color_data */
    -      struct pipe_transfer *transfer = screen->get_tex_transfer(
    -         screen, texture, 0, 0, 0,
    +      struct pipe_transfer *transfer = pipe->get_tex_transfer(
    +         pipe, texture, 0, 0, 0,
              PIPE_TRANSFER_WRITE, 0, 0, texture->width0, texture->height0);
           src += (dataStride * yoffset);
           for (i = 0; i < height; i++) {
              _vega_unpack_float_span_rgba(ctx, width, xoffset, src, dataFormat, temp);
    -         pipe_put_tile_rgba(transfer, x+image->x, y+image->y, width, 1, df);
    +         pipe_put_tile_rgba(pipe, transfer, x+image->x, y+image->y, width, 1, df);
              y += yStep;
              src += dataStride;
           }
    -      screen->tex_transfer_destroy(transfer);
    +      pipe->tex_transfer_destroy(pipe, transfer);
        }
     }
     
    @@ -435,7 +435,6 @@ void image_get_sub_data(struct vg_image * image,
     {
        struct vg_context *ctx = vg_current_context();
        struct pipe_context *pipe = ctx->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        VGfloat temp[VEGA_MAX_IMAGE_WIDTH][4];
        VGfloat *df = (VGfloat*)temp;
        VGint y = 0, yStep = 1;
    @@ -444,7 +443,7 @@ void image_get_sub_data(struct vg_image * image,
     
        {
           struct pipe_transfer *transfer =
    -         screen->get_tex_transfer(screen,
    +         pipe->get_tex_transfer(pipe,
                                       image->texture,  0, 0, 0,
                                       PIPE_TRANSFER_READ,
                                       0, 0,
    @@ -455,13 +454,13 @@ void image_get_sub_data(struct vg_image * image,
     #if 0
              debug_printf("%d-%d  == %d\n", sy, height, y);
     #endif
    -         pipe_get_tile_rgba(transfer, sx+image->x, y, width, 1, df);
    +         pipe_get_tile_rgba(pipe, transfer, sx+image->x, y, width, 1, df);
              y += yStep;
              _vega_pack_rgba_span_float(ctx, width, temp, dataFormat, dst);
              dst += dataStride;
           }
     
    -      screen->tex_transfer_destroy(transfer);
    +      pipe->tex_transfer_destroy(pipe, transfer);
        }
     }
     
    diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c
    index cdb87d3bf68..dc56b8c5f3b 100644
    --- a/src/gallium/state_trackers/vega/paint.c
    +++ b/src/gallium/state_trackers/vega/paint.c
    @@ -164,10 +164,10 @@ static INLINE struct pipe_texture *create_gradient_texture(struct vg_paint *p)
           struct pipe_transfer *transfer =
              st_no_flush_get_tex_transfer(p->base.ctx, tex, 0, 0, 0,
                                           PIPE_TRANSFER_WRITE, 0, 0, 1024, 1);
    -      void *map = screen->transfer_map(screen, transfer);
    +      void *map = pipe->transfer_map(pipe, transfer);
           memcpy(map, p->gradient.color_data, sizeof(VGint)*1024);
    -      screen->transfer_unmap(screen, transfer);
    -      screen->tex_transfer_destroy(transfer);
    +      pipe->transfer_unmap(pipe, transfer);
    +      pipe->tex_transfer_destroy(pipe, transfer);
        }
     
        return tex;
    diff --git a/src/gallium/state_trackers/vega/st_inlines.h b/src/gallium/state_trackers/vega/st_inlines.h
    index 419151c3aee..4d12a4efdd6 100644
    --- a/src/gallium/state_trackers/vega/st_inlines.h
    +++ b/src/gallium/state_trackers/vega/st_inlines.h
    @@ -51,7 +51,6 @@ st_cond_flush_get_tex_transfer(struct vg_context *st,
     			       unsigned int x, unsigned int y,
     			       unsigned int w, unsigned int h)
     {
    -   struct pipe_screen *screen = st->pipe->screen;
        struct pipe_context *pipe = st->pipe;
        unsigned referenced =
           pipe->is_texture_referenced(pipe, pt, face, level);
    @@ -60,7 +59,7 @@ st_cond_flush_get_tex_transfer(struct vg_context *st,
     		      (usage & PIPE_TRANSFER_WRITE)))
           vgFlush();
     
    -   return screen->get_tex_transfer(screen, pt, face, level, zslice, usage,
    +   return pipe->get_tex_transfer(pipe, pt, face, level, zslice, usage,
     				   x, y, w, h);
     }
     
    @@ -74,10 +73,10 @@ st_no_flush_get_tex_transfer(struct vg_context *st,
     			     unsigned int x, unsigned int y,
     			     unsigned int w, unsigned int h)
     {
    -   struct pipe_screen *screen = st->pipe->screen;
    +   struct pipe_context *pipe = st->pipe;
     
    -   return screen->get_tex_transfer(screen, pt, face, level,
    -				   zslice, usage, x, y, w, h);
    +   return pipe->get_tex_transfer(pipe, pt, face, level,
    +				 zslice, usage, x, y, w, h);
     }
     
     static INLINE void *
    diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c
    index a002e50faf7..ea5c2ce41f6 100644
    --- a/src/gallium/state_trackers/vega/vg_tracker.c
    +++ b/src/gallium/state_trackers/vega/vg_tracker.c
    @@ -380,16 +380,8 @@ boolean st_make_current(struct vg_context *st,
                             void *winsys_drawable_handle)
     {
        vg_set_current_context(st);
    -   if (st) {
    +   if (st)
           st->draw_buffer = draw;
    -
    -      /* VG state tracker doesn't seem to do front-buffer rendering
    -       * (no calls to flush_frontbuffer).  If it ever did start doing
    -       * that, it would need to pass this value down in the
    -       * flush_frontbuffer call:
    -       */
    -      st->pipe->priv = winsys_drawable_handle;
    -   }
        return VG_TRUE;
     }
     
    diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.h b/src/gallium/state_trackers/wgl/stw_framebuffer.h
    index 08cc4973bce..e61e9bf9c26 100644
    --- a/src/gallium/state_trackers/wgl/stw_framebuffer.h
    +++ b/src/gallium/state_trackers/wgl/stw_framebuffer.h
    @@ -45,7 +45,7 @@ struct stw_framebuffer
        /**
         * This mutex has two purposes:
         * - protect the access to the mutable data members below
    -    * - prevent the the framebuffer from being deleted while being accessed.
    +    * - prevent the framebuffer from being deleted while being accessed.
         * 
         * It is OK to lock this mutex while holding the stw_device::fb_mutex lock, 
         * but the opposite must never happen.
    diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c
    index c50873c1508..715a5e7b943 100644
    --- a/src/gallium/state_trackers/xorg/xorg_composite.c
    +++ b/src/gallium/state_trackers/xorg/xorg_composite.c
    @@ -359,14 +359,6 @@ bind_samplers(struct exa_context *exa, int op,
     
        exa->num_bound_samplers = 0;
     
    -#if 0
    -   if ((pSrc && (exa->pipe->is_texture_referenced(exa->pipe, pSrc->tex, 0, 0) &
    -                 PIPE_REFERENCED_FOR_WRITE)) ||
    -       (pMask && (exa->pipe->is_texture_referenced(exa->pipe, pMask->tex, 0, 0) &
    -        PIPE_REFERENCED_FOR_WRITE)))
    -      xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, NULL);
    -#endif
    -
        memset(&src_sampler, 0, sizeof(struct pipe_sampler_state));
        memset(&mask_sampler, 0, sizeof(struct pipe_sampler_state));
     
    diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c
    index 44f7da0f960..eef428232b0 100644
    --- a/src/gallium/state_trackers/xorg/xorg_crtc.c
    +++ b/src/gallium/state_trackers/xorg/xorg_crtc.c
    @@ -39,6 +39,7 @@
     #include 
     #include 
     #include 
    +#include 
     #include "xorg_tracker.h"
     #include "xf86Modes.h"
     
    @@ -219,16 +220,16 @@ crtc_load_cursor_argb_ga3d(xf86CrtcPtr crtc, CARD32 * image)
     	crtcp->cursor_handle = whandle.handle;
         }
     
    -    transfer = ms->screen->get_tex_transfer(ms->screen, crtcp->cursor_tex,
    -					    0, 0, 0,
    -					    PIPE_TRANSFER_WRITE,
    -					    0, 0, 64, 64);
    -    ptr = ms->screen->transfer_map(ms->screen, transfer);
    +    transfer = ms->ctx->get_tex_transfer(ms->ctx, crtcp->cursor_tex,
    +                                         0, 0, 0,
    +                                         PIPE_TRANSFER_WRITE,
    +                                         0, 0, 64, 64);
    +    ptr = ms->ctx->transfer_map(ms->ctx, transfer);
         util_copy_rect(ptr, crtcp->cursor_tex->format,
     		   transfer->stride, 0, 0,
     		   64, 64, (void*)image, 64 * 4, 0, 0);
    -    ms->screen->transfer_unmap(ms->screen, transfer);
    -    ms->screen->tex_transfer_destroy(transfer);
    +    ms->ctx->transfer_unmap(ms->ctx, transfer);
    +    ms->ctx->tex_transfer_destroy(ms->ctx, transfer);
     }
     
     #if HAVE_LIBKMS
    @@ -276,7 +277,21 @@ err_bo_destroy:
     static void
     crtc_load_cursor_argb(xf86CrtcPtr crtc, CARD32 * image)
     {
    +    xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(crtc->scrn);
         modesettingPtr ms = modesettingPTR(crtc->scrn);
    +
    +    /* Older X servers have cursor reference counting bugs leading to use of
    +     * freed memory and consequently random crashes. Should be fixed as of
    +     * xserver 1.8, but this workaround shouldn't hurt anyway.
    +     */
    +    if (config->cursor)
    +       config->cursor->refcnt++;
    +
    +    if (ms->cursor)
    +       FreeCursor(ms->cursor, None);
    +
    +    ms->cursor = config->cursor;
    +
         if (ms->screen)
     	crtc_load_cursor_argb_ga3d(crtc, image);
     #ifdef HAVE_LIBKMS
    diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c
    index 004a28f00e1..8ac5179545a 100644
    --- a/src/gallium/state_trackers/xorg/xorg_driver.c
    +++ b/src/gallium/state_trackers/xorg/xorg_driver.c
    @@ -922,6 +922,11 @@ drv_close_screen(int scrnIndex, ScreenPtr pScreen)
     	drv_leave_vt(scrnIndex, 0);
         }
     
    +    if (ms->cursor) {
    +       FreeCursor(ms->cursor, None);
    +       ms->cursor = NULL;
    +    }
    +
         if (cust && cust->winsys_screen_close)
     	cust->winsys_screen_close(cust);
     
    diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c
    index a7ffe3f4992..bdec0e254fa 100644
    --- a/src/gallium/state_trackers/xorg/xorg_exa.c
    +++ b/src/gallium/state_trackers/xorg/xorg_exa.c
    @@ -188,11 +188,7 @@ ExaDownloadFromScreen(PixmapPtr pPix, int x,  int y, int w,  int h, char *dst,
         if (!priv || !priv->tex)
     	return FALSE;
     
    -    if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) &
    -	PIPE_REFERENCED_FOR_WRITE)
    -	exa->pipe->flush(exa->pipe, 0, NULL);
    -
    -    transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0,
    +    transfer = exa->pipe->get_tex_transfer(exa->pipe, priv->tex, 0, 0, 0,
     					   PIPE_TRANSFER_READ, x, y, w, h);
         if (!transfer)
     	return FALSE;
    @@ -203,11 +199,11 @@ ExaDownloadFromScreen(PixmapPtr pPix, int x,  int y, int w,  int h, char *dst,
     #endif
     
         util_copy_rect((unsigned char*)dst, priv->tex->format, dst_pitch, 0, 0,
    -		   w, h, exa->scrn->transfer_map(exa->scrn, transfer),
    +		   w, h, exa->pipe->transfer_map(exa->pipe, transfer),
     		   transfer->stride, 0, 0);
     
    -    exa->scrn->transfer_unmap(exa->scrn, transfer);
    -    exa->scrn->tex_transfer_destroy(transfer);
    +    exa->pipe->transfer_unmap(exa->pipe, transfer);
    +    exa->pipe->tex_transfer_destroy(exa->pipe, transfer);
     
         return TRUE;
     }
    @@ -226,12 +222,7 @@ ExaUploadToScreen(PixmapPtr pPix, int x, int y, int w, int h, char *src,
         if (!priv || !priv->tex)
     	return FALSE;
     
    -    /* make sure that any pending operations are flushed to hardware */
    -    if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) &
    -	(PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE))
    -	xorg_exa_flush(exa, 0, NULL);
    -
    -    transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0,
    +    transfer = exa->pipe->get_tex_transfer(exa->pipe, priv->tex, 0, 0, 0,
     					   PIPE_TRANSFER_WRITE, x, y, w, h);
         if (!transfer)
     	return FALSE;
    @@ -241,12 +232,12 @@ ExaUploadToScreen(PixmapPtr pPix, int x, int y, int w, int h, char *src,
                      x, y, w, h, src_pitch);
     #endif
     
    -    util_copy_rect(exa->scrn->transfer_map(exa->scrn, transfer),
    +    util_copy_rect(exa->pipe->transfer_map(exa->pipe, transfer),
     		   priv->tex->format, transfer->stride, 0, 0, w, h,
     		   (unsigned char*)src, src_pitch, 0, 0);
     
    -    exa->scrn->transfer_unmap(exa->scrn, transfer);
    -    exa->scrn->tex_transfer_destroy(transfer);
    +    exa->pipe->transfer_unmap(exa->pipe, transfer);
    +    exa->pipe->tex_transfer_destroy(exa->pipe, transfer);
     
         return TRUE;
     }
    @@ -270,15 +261,11 @@ ExaPrepareAccess(PixmapPtr pPix, int index)
     
         if (priv->map_count == 0)
         {
    -	if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) &
    -	    PIPE_REFERENCED_FOR_WRITE)
    -	    exa->pipe->flush(exa->pipe, 0, NULL);
    -
             assert(pPix->drawable.width <= priv->tex->width0);
             assert(pPix->drawable.height <= priv->tex->height0);
     
     	priv->map_transfer =
    -	    exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0,
    +	    exa->pipe->get_tex_transfer(exa->pipe, priv->tex, 0, 0, 0,
     #ifdef EXA_MIXED_PIXMAPS
     					PIPE_TRANSFER_MAP_DIRECTLY |
     #endif
    @@ -294,7 +281,7 @@ ExaPrepareAccess(PixmapPtr pPix, int index)
     #endif
     
     	pPix->devPrivate.ptr =
    -	    exa->scrn->transfer_map(exa->scrn, priv->map_transfer);
    +	    exa->pipe->transfer_map(exa->pipe, priv->map_transfer);
     	pPix->devKind = priv->map_transfer->stride;
         }
     
    @@ -321,8 +308,8 @@ ExaFinishAccess(PixmapPtr pPix, int index)
     
         if (--priv->map_count == 0) {
     	assert(priv->map_transfer);
    -	exa->scrn->transfer_unmap(exa->scrn, priv->map_transfer);
    -	exa->scrn->tex_transfer_destroy(priv->map_transfer);
    +	exa->pipe->transfer_unmap(exa->pipe, priv->map_transfer);
    +	exa->pipe->tex_transfer_destroy(exa->pipe, priv->map_transfer);
     	priv->map_transfer = NULL;
     	pPix->devPrivate.ptr = NULL;
         }
    diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h
    index 2f5cc64d9c5..c1884ebd115 100644
    --- a/src/gallium/state_trackers/xorg/xorg_tracker.h
    +++ b/src/gallium/state_trackers/xorg/xorg_tracker.h
    @@ -90,6 +90,7 @@ typedef struct _modesettingRec
     
         Bool noAccel;
         Bool SWCursor;
    +    CursorPtr cursor;
         CloseScreenProcPtr CloseScreen;
     
         /* Broken-out options. */
    diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c
    index e37a1c39596..5a195cb482d 100644
    --- a/src/gallium/state_trackers/xorg/xorg_xv.c
    +++ b/src/gallium/state_trackers/xorg/xorg_xv.c
    @@ -275,28 +275,28 @@ copy_packed_data(ScrnInfoPtr pScrn,
        int i, j;
        struct pipe_texture **dst = port->yuv[port->current_set];
        struct pipe_transfer *ytrans, *utrans, *vtrans;
    -   struct pipe_screen *screen = port->r->pipe->screen;
    +   struct pipe_context *pipe = port->r->pipe;
        char *ymap, *vmap, *umap;
        unsigned char y1, y2, u, v;
        int yidx, uidx, vidx;
        int y_array_size = w * h;
     
    -   ytrans = screen->get_tex_transfer(screen, dst[0],
    -                                     0, 0, 0,
    -                                     PIPE_TRANSFER_WRITE,
    -                                     left, top, w, h);
    -   utrans = screen->get_tex_transfer(screen, dst[1],
    -                                     0, 0, 0,
    -                                     PIPE_TRANSFER_WRITE,
    -                                     left, top, w, h);
    -   vtrans = screen->get_tex_transfer(screen, dst[2],
    -                                     0, 0, 0,
    -                                     PIPE_TRANSFER_WRITE,
    -                                     left, top, w, h);
    +   ytrans = pipe->get_tex_transfer(pipe, dst[0],
    +                                   0, 0, 0,
    +                                   PIPE_TRANSFER_WRITE,
    +                                   left, top, w, h);
    +   utrans = pipe->get_tex_transfer(pipe, dst[1],
    +                                   0, 0, 0,
    +                                   PIPE_TRANSFER_WRITE,
    +                                   left, top, w, h);
    +   vtrans = pipe->get_tex_transfer(pipe, dst[2],
    +                                   0, 0, 0,
    +                                   PIPE_TRANSFER_WRITE,
    +                                   left, top, w, h);
     
    -   ymap = (char*)screen->transfer_map(screen, ytrans);
    -   umap = (char*)screen->transfer_map(screen, utrans);
    -   vmap = (char*)screen->transfer_map(screen, vtrans);
    +   ymap = (char*)pipe->transfer_map(pipe, ytrans);
    +   umap = (char*)pipe->transfer_map(pipe, utrans);
    +   vmap = (char*)pipe->transfer_map(pipe, vtrans);
     
        yidx = uidx = vidx = 0;
     
    @@ -362,12 +362,12 @@ copy_packed_data(ScrnInfoPtr pScrn,
           break;
        }
     
    -   screen->transfer_unmap(screen, ytrans);
    -   screen->transfer_unmap(screen, utrans);
    -   screen->transfer_unmap(screen, vtrans);
    -   screen->tex_transfer_destroy(ytrans);
    -   screen->tex_transfer_destroy(utrans);
    -   screen->tex_transfer_destroy(vtrans);
    +   pipe->transfer_unmap(pipe, ytrans);
    +   pipe->transfer_unmap(pipe, utrans);
    +   pipe->transfer_unmap(pipe, vtrans);
    +   pipe->tex_transfer_destroy(pipe, ytrans);
    +   pipe->tex_transfer_destroy(pipe, utrans);
    +   pipe->tex_transfer_destroy(pipe, vtrans);
     }
     
     
    diff --git a/src/gallium/winsys/drm/vmware/dri/SConscript b/src/gallium/winsys/drm/vmware/dri/SConscript
    index 84319f91ff1..d26d0cd7483 100644
    --- a/src/gallium/winsys/drm/vmware/dri/SConscript
    +++ b/src/gallium/winsys/drm/vmware/dri/SConscript
    @@ -48,6 +48,7 @@ if env['platform'] == 'linux':
                 svgadrm,
                 svga,
                 mesa,
    +            glsl,
                 gallium,
                 ])
           
    diff --git a/src/gallium/winsys/xlib/xlib_sw_winsys.c b/src/gallium/winsys/xlib/xlib_sw_winsys.c
    index 29c642706a4..cecfa4a53d4 100644
    --- a/src/gallium/winsys/xlib/xlib_sw_winsys.c
    +++ b/src/gallium/winsys/xlib/xlib_sw_winsys.c
    @@ -363,7 +363,7 @@ xm_displaytarget_create(struct sw_winsys *winsys,
                             unsigned alignment,
                             unsigned *stride)
     {
    -   struct xm_displaytarget *xm_dt = CALLOC_STRUCT(xm_displaytarget);
    +   struct xm_displaytarget *xm_dt;
        unsigned nblocksy, size;
     
        xm_dt = CALLOC_STRUCT(xm_displaytarget);
    diff --git a/src/glu/sgi/libnurbs/nurbtess/partitionY.h b/src/glu/sgi/libnurbs/nurbtess/partitionY.h
    index 8dda409de1a..5570c183d79 100644
    --- a/src/glu/sgi/libnurbs/nurbtess/partitionY.h
    +++ b/src/glu/sgi/libnurbs/nurbtess/partitionY.h
    @@ -39,7 +39,7 @@
      *or both at or below v. In addition, at least one of the ajacent verteces is
      *strictly below or above v. 
      * A vertex is a relex vertex if the internals angle is strictly greater than 
    - *180. In other words, if the the signed area is negative:
    + *180. In other words, if the signed area is negative:
      *(x1, y1), (x2, y2), (x3, y3) are the three vertices along a polygon, the 
      *order is such that left hand side is inside the polygon. Then (x2,y2) is
      *reflex if: 
    diff --git a/src/glu/sgi/libtess/normal.c b/src/glu/sgi/libtess/normal.c
    index 0a2494be343..7ab83167bbd 100644
    --- a/src/glu/sgi/libtess/normal.c
    +++ b/src/glu/sgi/libtess/normal.c
    @@ -142,7 +142,7 @@ static void CheckOrientation( GLUtesselator *tess )
       GLUhalfEdge *e;
     
       /* When we compute the normal automatically, we choose the orientation
    -   * so that the the sum of the signed areas of all contours is non-negative.
    +   * so that the sum of the signed areas of all contours is non-negative.
        */
       area = 0;
       for( f = fHead->next; f != fHead; f = f->next ) {
    diff --git a/src/glx/single2.c b/src/glx/single2.c
    index 9ecf589ffff..a1461956b99 100644
    --- a/src/glx/single2.c
    +++ b/src/glx/single2.c
    @@ -44,6 +44,9 @@
     #include 
     #endif /* USE_XCB */
     
    +#if !defined(__GNUC__)
    +#  define __builtin_expect(x, y) x
    +#endif
     
     /* Used for GL_ARB_transpose_matrix */
     static void
    diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
    index 890ae513397..75c98825b79 100644
    --- a/src/mesa/drivers/dri/common/dri_util.c
    +++ b/src/mesa/drivers/dri/common/dri_util.c
    @@ -698,7 +698,7 @@ setupLoaderExtensions(__DRIscreen *psp,
      * \param drm_version Version of the kernel DRM.
      * \param frame_buffer Data describing the location and layout of the
      *                     framebuffer.
    - * \param pSAREA       Pointer the the SAREA.
    + * \param pSAREA       Pointer to the SAREA.
      * \param fd           Device handle for the DRM.
      * \param extensions   ??
      * \param driver_modes  Returns modes suppoted by the driver
    diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
    index a242580273f..842d4b7aa10 100644
    --- a/src/mesa/drivers/dri/i965/Makefile
    +++ b/src/mesa/drivers/dri/i965/Makefile
    @@ -54,6 +54,7 @@ DRIVER_SOURCES = \
     	brw_gs_emit.c \
     	brw_gs_state.c \
     	brw_misc_state.c \
    +	brw_optimize.c \
     	brw_program.c \
     	brw_queryobj.c \
     	brw_sf.c \
    diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
    index a512896f315..241193c3579 100644
    --- a/src/mesa/drivers/dri/i965/brw_context.c
    +++ b/src/mesa/drivers/dri/i965/brw_context.c
    @@ -156,6 +156,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
           brw->has_surface_tile_offset = GL_TRUE;
           brw->has_compr4 = GL_TRUE;
           brw->has_aa_line_parameters = GL_TRUE;
    +      brw->has_pln = GL_TRUE;
       } else {
           brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965;
           brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
    diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
    index d6fc37e4d89..2855c93ea66 100644
    --- a/src/mesa/drivers/dri/i965/brw_context.h
    +++ b/src/mesa/drivers/dri/i965/brw_context.h
    @@ -446,6 +446,7 @@ struct brw_context
        GLboolean has_compr4;
        GLboolean has_negative_rhw_bug;
        GLboolean has_aa_line_parameters;
    +   GLboolean has_pln;
     ;
        struct {
           struct brw_state_flags dirty;
    diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
    index bb1b5f5ef03..984e56d00c8 100644
    --- a/src/mesa/drivers/dri/i965/brw_defines.h
    +++ b/src/mesa/drivers/dri/i965/brw_defines.h
    @@ -550,6 +550,7 @@
     #define BRW_OPCODE_DP2        87
     #define BRW_OPCODE_DPA2       88
     #define BRW_OPCODE_LINE       89
    +#define BRW_OPCODE_PLN        90
     #define BRW_OPCODE_NOP        126
     
     #define BRW_PREDICATE_NONE             0
    diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
    index 54699cf8d34..ad61770212c 100644
    --- a/src/mesa/drivers/dri/i965/brw_disasm.c
    +++ b/src/mesa/drivers/dri/i965/brw_disasm.c
    @@ -50,6 +50,7 @@ struct {
         [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
         [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
         [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
    +    [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
         [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
         [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
         [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
    @@ -73,7 +74,7 @@ struct {
         [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
         [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
         [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
    -    [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
    +    [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
         [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
         [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
         [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
    diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
    index 39eb88d7c2b..4f55158e8f3 100644
    --- a/src/mesa/drivers/dri/i965/brw_eu.h
    +++ b/src/mesa/drivers/dri/i965/brw_eu.h
    @@ -795,6 +795,7 @@ ALU2(DPH)
     ALU2(DP3)
     ALU2(DP2)
     ALU2(LINE)
    +ALU2(PLN)
     
     #undef ALU1
     #undef ALU2
    @@ -965,4 +966,9 @@ void brw_math_invert( struct brw_compile *p,
     
     void brw_set_src1( struct brw_instruction *insn,
                               struct brw_reg reg );
    +
    +
    +/* brw_optimize.c */
    +void brw_optimize(struct brw_compile *p);
    +
     #endif
    diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
    index f69d5296137..d2395dec288 100644
    --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
    +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
    @@ -573,7 +573,7 @@ ALU2(DPH)
     ALU2(DP3)
     ALU2(DP2)
     ALU2(LINE)
    -
    +ALU2(PLN)
     
     
     
    @@ -1290,7 +1290,7 @@ void brw_SAMPLE(struct brw_compile *p,
     		GLuint simd_mode)
     {
        GLboolean need_stall = 0;
    -   
    +
        if (writemask == 0) {
           /*printf("%s: zero writemask??\n", __FUNCTION__); */
           return;
    @@ -1327,8 +1327,14 @@ void brw_SAMPLE(struct brw_compile *p,
              /* printf("need stall %x %x\n", newmask , writemask); */
           }
           else {
    +	 GLboolean dispatch_16 = GL_FALSE;
    +
     	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
    -	 
    +
    +	 guess_execution_size(p->current, dest);
    +	 if (p->current->header.execution_size == BRW_EXECUTE_16)
    +	    dispatch_16 = GL_TRUE;
    +
     	 newmask = ~newmask & WRITEMASK_XYZW;
     
     	 brw_push_insn_state(p);
    @@ -1343,7 +1349,13 @@ void brw_SAMPLE(struct brw_compile *p,
     
       	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
     	 dest = offset(dest, dst_offset);
    -	 response_length = len * 2;
    +
    +	 /* For 16-wide dispatch, masked channels are skipped in the
    +	  * response.  For 8-wide, masked channels still take up slots,
    +	  * and are just not written to.
    +	  */
    +	 if (dispatch_16)
    +	    response_length = len * 2;
           }
        }
     
    diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c
    new file mode 100644
    index 00000000000..57df9ea1151
    --- /dev/null
    +++ b/src/mesa/drivers/dri/i965/brw_optimize.c
    @@ -0,0 +1,115 @@
    +/*
    + * Copyright © 2010 Intel Corporation
    + *
    + * Permission is hereby granted, free of charge, to any person obtaining a
    + * copy of this software and associated documentation files (the "Software"),
    + * to deal in the Software without restriction, including without limitation
    + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    + * and/or sell copies of the Software, and to permit persons to whom the
    + * Software is furnished to do so, subject to the following conditions:
    + *
    + * The above copyright notice and this permission notice (including the next
    + * paragraph) shall be included in all copies or substantial portions of the
    + * Software.
    + *
    + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
    + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
    + * IN THE SOFTWARE.
    + *
    + * Authors:
    + *    Eric Anholt 
    + *
    + */
    +
    +#include "main/macros.h"
    +#include "shader/program.h"
    +#include "shader/prog_parameter.h"
    +#include "shader/prog_print.h"
    +#include "brw_context.h"
    +#include "brw_defines.h"
    +#include "brw_eu.h"
    +
    +static GLboolean
    +is_single_channel_dp4(struct brw_instruction *insn)
    +{
    +   if (insn->header.opcode != BRW_OPCODE_DP4 ||
    +       insn->header.execution_size != BRW_EXECUTE_8 ||
    +       insn->header.access_mode != BRW_ALIGN_16 ||
    +       insn->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE)
    +      return GL_FALSE;
    +
    +   if (!is_power_of_two(insn->bits1.da16.dest_writemask))
    +      return GL_FALSE;
    +
    +   return GL_TRUE;
    +}
    +
    +/**
    + * Sets the dependency control fields on DP4 instructions.
    + *
    + * The hardware only tracks dependencies on a register basis, so when
    + * you do:
    + *
    + * DP4 dst.x src1 src2
    + * DP4 dst.y src1 src3
    + * DP4 dst.z src1 src4
    + * DP4 dst.w src1 src5
    + *
    + * It will wait to do the DP4 dst.y until the dst.x is resolved, etc.
    + * We can examine our instruction stream and set the dependency
    + * control fields to tell the hardware when to do it.
    + *
    + * We may want to extend this to other instructions that are used to
    + * fill in a channel at a time of the destination register.
    + */
    +static void
    +brw_set_dp4_dependency_control(struct brw_compile *p)
    +{
    +   int i;
    +
    +   for (i = 1; i < p->nr_insn; i++) {
    +      struct brw_instruction *insn = &p->store[i];
    +      struct brw_instruction *prev = &p->store[i - 1];
    +
    +      if (!is_single_channel_dp4(prev))
    +	 continue;
    +
    +      if (!is_single_channel_dp4(insn)) {
    +	 i++;
    +	 continue;
    +      }
    +
    +      /* Only avoid hw dep control if the write masks are different
    +       * channels of one reg.
    +       */
    +      if (insn->bits1.da16.dest_writemask == prev->bits1.da16.dest_writemask)
    +	 continue;
    +      if (insn->bits1.da16.dest_reg_nr != prev->bits1.da16.dest_reg_nr)
    +	 continue;
    +
    +      /* Check if the second instruction depends on the previous one
    +       * for a src.
    +       */
    +      if (insn->bits1.da1.src0_reg_file == BRW_GENERAL_REGISTER_FILE &&
    +	  (insn->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT ||
    +	   insn->bits2.da1.src0_reg_nr == insn->bits1.da16.dest_reg_nr))
    +	  continue;
    +      if (insn->bits1.da1.src1_reg_file == BRW_GENERAL_REGISTER_FILE &&
    +	  (insn->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT ||
    +	   insn->bits3.da1.src1_reg_nr == insn->bits1.da16.dest_reg_nr))
    +	  continue;
    +
    +      prev->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
    +      insn->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
    +   }
    +}
    +
    +void
    +brw_optimize(struct brw_compile *p)
    +{
    +   brw_set_dp4_dependency_control(p);
    +}
    diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
    index a48804a660f..d16e916832e 100644
    --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
    +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
    @@ -384,9 +384,8 @@ static void emit_sop( struct brw_vs_compile *c,
     {
        struct brw_compile *p = &c->func;
     
    -   brw_MOV(p, dst, brw_imm_f(0.0f));
    -   brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
    -   brw_MOV(p, dst, brw_imm_f(1.0f));
    +   brw_CMP(p, brw_null_reg(), cond, arg1, arg0);
    +   brw_SEL(p, dst, brw_null_reg(), brw_imm_f(1.0f));
        brw_set_predicate_control_flag_value(p, 0xff);
     }
     
    @@ -1825,6 +1824,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
     
        post_vs_emit(c, end_inst, last_inst);
     
    +   brw_optimize(p);
    +
        if (INTEL_DEBUG & DEBUG_VS) {
           int i;
     
    diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
    index 88d84ee82fe..47b764d24d1 100644
    --- a/src/mesa/drivers/dri/i965/brw_wm.h
    +++ b/src/mesa/drivers/dri/i965/brw_wm.h
    @@ -328,6 +328,12 @@ void emit_cinterp(struct brw_compile *p,
     		  const struct brw_reg *dst,
     		  GLuint mask,
     		  const struct brw_reg *arg0);
    +void emit_cmp(struct brw_compile *p,
    +	      const struct brw_reg *dst,
    +	      GLuint mask,
    +	      const struct brw_reg *arg0,
    +	      const struct brw_reg *arg1,
    +	      const struct brw_reg *arg2);
     void emit_ddxy(struct brw_compile *p,
     	       const struct brw_reg *dst,
     	       GLuint mask,
    diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
    index 9315bca3156..05e464d4b61 100644
    --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
    +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
    @@ -34,6 +34,23 @@
     #include "brw_context.h"
     #include "brw_wm.h"
     
    +static GLboolean can_do_pln(struct intel_context *intel,
    +			    const struct brw_reg *deltas)
    +{
    +   struct brw_context *brw = brw_context(&intel->ctx);
    +
    +   if (!brw->has_pln)
    +      return GL_FALSE;
    +
    +   if (deltas[1].nr != deltas[0].nr + 1)
    +      return GL_FALSE;
    +
    +   if (intel->gen < 6 && ((deltas[0].nr & 1) != 0))
    +      return GL_FALSE;
    +
    +   return GL_TRUE;
    +}
    +
     /* Not quite sure how correct this is - need to understand horiz
      * vs. vertical strides a little better.
      */
    @@ -45,7 +62,13 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
     }
     
     
    -/* Payload R0:
    +/**
    + * Computes the screen-space x,y position of the pixels.
    + *
    + * This will be used by emit_delta_xy() or emit_wpos_xy() for
    + * interpolation of attributes..
    + *
    + * Payload R0:
      *
      * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
      *         corresponding to each of the 16 execution channels.
    @@ -60,7 +83,6 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
      * R1.7 -- ?
      * R1.8 -- ?
      */
    -
     void emit_pixel_xy(struct brw_wm_compile *c,
     		   const struct brw_reg *dst,
     		   GLuint mask)
    @@ -100,7 +122,14 @@ void emit_pixel_xy(struct brw_wm_compile *c,
        brw_pop_insn_state(p);
     }
     
    -
    +/**
    + * Computes the screen-space x,y distance of the pixels from the start
    + * vertex.
    + *
    + * This will be used in linterp or pinterp with the start vertex value
    + * and the Cx, Cy, and C0 coefficients passed in from the setup engine
    + * to produce interpolated attribute values.
    + */
     void emit_delta_xy(struct brw_compile *p,
     		   const struct brw_reg *dst,
     		   GLuint mask,
    @@ -108,25 +137,27 @@ void emit_delta_xy(struct brw_compile *p,
     {
        struct brw_reg r1 = brw_vec1_grf(1, 0);
     
    +   if (mask == 0)
    +      return;
    +
    +   assert(mask == WRITEMASK_XY);
    +
        /* Calc delta X,Y by subtracting origin in r1 from the pixel
    -    * centers.
    +    * centers produced by emit_pixel_xy().
         */
    -   if (mask & WRITEMASK_X) {
    -      brw_ADD(p,
    -	      dst[0],
    -	      retype(arg0[0], BRW_REGISTER_TYPE_UW),
    -	      negate(r1));
    -   }
    -
    -   if (mask & WRITEMASK_Y) {
    -      brw_ADD(p,
    -	      dst[1],
    -	      retype(arg0[1], BRW_REGISTER_TYPE_UW),
    -	      negate(suboffset(r1,1)));
    -
    -   }
    +   brw_ADD(p,
    +	   dst[0],
    +	   retype(arg0[0], BRW_REGISTER_TYPE_UW),
    +	   negate(r1));
    +   brw_ADD(p,
    +	   dst[1],
    +	   retype(arg0[1], BRW_REGISTER_TYPE_UW),
    +	   negate(suboffset(r1,1)));
     }
     
    +/**
    + * Computes the pixel offset from the window origin for gl_FragCoord().
    + */
     void emit_wpos_xy(struct brw_wm_compile *c,
     		  const struct brw_reg *dst,
     		  GLuint mask,
    @@ -134,9 +165,6 @@ void emit_wpos_xy(struct brw_wm_compile *c,
     {
        struct brw_compile *p = &c->func;
     
    -   /* Calculate the pixel offset from window bottom left into destination
    -    * X and Y channels.
    -    */
        if (mask & WRITEMASK_X) {
           if (c->fp->program.PixelCenterInteger) {
     	 /* X' = X */
    @@ -186,6 +214,7 @@ void emit_pixel_w(struct brw_wm_compile *c,
     		  const struct brw_reg *deltas)
     {
        struct brw_compile *p = &c->func;
    +   struct intel_context *intel = &p->brw->intel;
     
        /* Don't need this if all you are doing is interpolating color, for
         * instance.
    @@ -196,8 +225,12 @@ void emit_pixel_w(struct brw_wm_compile *c,
           /* Calc 1/w - just linterp wpos[3] optimized by putting the
            * result straight into a message reg.
            */
    -      brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
    -      brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
    +      if (can_do_pln(intel, deltas)) {
    +	 brw_PLN(p, brw_message_reg(2), interp3, deltas[0]);
    +      } else {
    +	 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
    +	 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
    +      }
     
           /* Calc w */
           if (c->dispatch_width == 16) {
    @@ -224,6 +257,7 @@ void emit_linterp(struct brw_compile *p,
     		  const struct brw_reg *arg0,
     		  const struct brw_reg *deltas)
     {
    +   struct intel_context *intel = &p->brw->intel;
        struct brw_reg interp[4];
        GLuint nr = arg0[0].nr;
        GLuint i;
    @@ -235,8 +269,12 @@ void emit_linterp(struct brw_compile *p,
     
        for (i = 0; i < 4; i++) {
           if (mask & (1<brw->intel;
        struct brw_reg interp[4];
        GLuint nr = arg0[0].nr;
        GLuint i;
    @@ -260,8 +299,12 @@ void emit_pinterp(struct brw_compile *p,
     
        for (i = 0; i < 4; i++) {
           if (mask & (1<func;
        struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
    -   GLuint i;
    -   
    -   /* XXX - usually won't need 4 compares!
    -    */
    +   GLuint i, j;
    +
        for (i = 0; i < 4; i++) {
    +      /* Check if we've already done the comparison for this reg
    +       * -- common when someone does KIL TEMP.wwww.
    +       */
    +      for (j = 0; j < i; j++) {
    +	 if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0)
    +	    break;
    +      }
    +      if (j != i)
    +	 continue;
    +
           brw_push_insn_state(p);
           brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));   
           brw_set_predicate_control_flag_value(p, 0xff);
    diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
    index ea3c2405af9..0b66cc6c9f3 100644
    --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
    +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
    @@ -289,6 +289,7 @@ reclaim_temps(struct brw_wm_compile *c)
      */
     static void prealloc_reg(struct brw_wm_compile *c)
     {
    +    struct intel_context *intel = &c->func.brw->intel;
         int i, j;
         struct brw_reg reg;
         int urb_read_length = 0;
    @@ -413,6 +414,43 @@ static void prealloc_reg(struct brw_wm_compile *c)
     	}
         }
     
    +    for (i = 0; i < c->nr_fp_insns; i++) {
    +	const struct prog_instruction *inst = &c->prog_instructions[i];
    +
    +	switch (inst->Opcode) {
    +	case WM_DELTAXY:
    +	    /* Allocate WM_DELTAXY destination on G45/GM45 to an
    +	     * even-numbered GRF if possible so that we can use the PLN
    +	     * instruction.
    +	     */
    +	    if (inst->DstReg.WriteMask == WRITEMASK_XY &&
    +		!c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited &&
    +		!c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited &&
    +		(IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) {
    +		int grf;
    +
    +		for (grf = c->first_free_grf & ~1;
    +		     grf < BRW_WM_MAX_GRF;
    +		     grf += 2)
    +		{
    +		    if (!c->used_grf[grf] && !c->used_grf[grf + 1]) {
    +			c->used_grf[grf] = GL_TRUE;
    +			c->used_grf[grf + 1] = GL_TRUE;
    +			c->first_free_grf = grf + 2;  /* a guess */
    +
    +			set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0,
    +				brw_vec8_grf(grf, 0));
    +			set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1,
    +				brw_vec8_grf(grf + 1, 0));
    +			break;
    +		    }
    +		}
    +	    }
    +	default:
    +	    break;
    +	}
    +    }
    +
         /* An instruction may reference up to three constants.
          * They'll be found in these registers.
          * XXX alloc these on demand!
    @@ -1869,6 +1907,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
     	    case OPCODE_LG2:
     		emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
     		break;
    +	    case OPCODE_CMP:
    +		emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
    +		break;
     	    case OPCODE_MIN:	
     		emit_min(p, dst, dst_flags, args[0], args[1]);
     		break;
    @@ -2026,8 +2067,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                    }
                    break;
     	    default:
    -		printf("unsupported IR in fragment shader %d\n",
    -			inst->Opcode);
    +		printf("unsupported opcode %d (%s) in fragment shader\n",
    +		       inst->Opcode, inst->Opcode < MAX_OPCODE ?
    +		       _mesa_opcode_string(inst->Opcode) : "unknown");
     	}
     
     	/* Release temporaries containing any unaliased source regs. */
    diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c
    index 502e01255cb..be57d48b8dd 100644
    --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c
    +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c
    @@ -69,8 +69,8 @@ nouveau_channel_flush_notify(struct nouveau_channel *chan)
     	struct nouveau_context *nctx = chan->user_private;
     	GLcontext *ctx = &nctx->base;
     
    -	if (nctx->fallback < SWRAST && ctx->DrawBuffer)
    -		nouveau_state_emit(&nctx->base);
    +	if (nctx->fallback < SWRAST)
    +		nouveau_bo_state_emit(ctx);
     }
     
     GLboolean
    diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.h b/src/mesa/drivers/dri/nouveau/nouveau_context.h
    index 682f8a414e3..fe64fec930b 100644
    --- a/src/mesa/drivers/dri/nouveau/nouveau_context.h
    +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.h
    @@ -85,6 +85,8 @@ struct nouveau_context {
     	BITSET_SET(to_nouveau_context(ctx)->dirty, NOUVEAU_STATE_##s)
     #define context_dirty_i(ctx, s, i) \
     	BITSET_SET(to_nouveau_context(ctx)->dirty, NOUVEAU_STATE_##s##0 + i)
    +#define context_emit(ctx, s) \
    +	context_drv(ctx)->emit[NOUVEAU_STATE_##s](ctx, NOUVEAU_STATE_##s)
     
     GLboolean
     nouveau_context_create(const __GLcontextModes *visual, __DRIcontext *dri_ctx,
    diff --git a/src/mesa/drivers/dri/nouveau/nv04_context.c b/src/mesa/drivers/dri/nouveau/nv04_context.c
    index a442425e448..3624b3af921 100644
    --- a/src/mesa/drivers/dri/nouveau/nv04_context.c
    +++ b/src/mesa/drivers/dri/nouveau/nv04_context.c
    @@ -75,18 +75,16 @@ nv04_channel_flush_notify(struct nouveau_channel *chan)
     	struct nouveau_context *nctx = chan->user_private;
     	GLcontext *ctx = &nctx->base;
     
    -	if (nctx->fallback < SWRAST && ctx->DrawBuffer) {
    -		GLcontext *ctx = &nctx->base;
    -
    +	if (nctx->fallback < SWRAST) {
     		/* Flushing seems to clobber the engine context. */
    -		context_dirty_i(ctx, TEX_OBJ, 0);
    -		context_dirty_i(ctx, TEX_OBJ, 1);
    -		context_dirty_i(ctx, TEX_ENV, 0);
    -		context_dirty_i(ctx, TEX_ENV, 1);
    -		context_dirty(ctx, CONTROL);
    -		context_dirty(ctx, BLEND);
    +		context_emit(ctx, TEX_OBJ0);
    +		context_emit(ctx, TEX_OBJ1);
    +		context_emit(ctx, TEX_ENV0);
    +		context_emit(ctx, TEX_ENV1);
    +		context_emit(ctx, CONTROL);
    +		context_emit(ctx, BLEND);
     
    -		nouveau_state_emit(ctx);
    +		nouveau_bo_state_emit(ctx);
     	}
     }
     
    @@ -200,9 +198,9 @@ nv04_context_create(struct nouveau_screen *screen, const GLvisual *visual,
     	if (ret)
     		goto fail;
     
    +	init_dummy_texture(ctx);
     	nv04_hwctx_init(ctx);
     	nv04_render_init(ctx);
    -	init_dummy_texture(ctx);
     
     	return ctx;
     
    diff --git a/src/mesa/drivers/dri/r128/r128_tex.c b/src/mesa/drivers/dri/r128/r128_tex.c
    index 24fbf8f5194..4ec4be9a47b 100644
    --- a/src/mesa/drivers/dri/r128/r128_tex.c
    +++ b/src/mesa/drivers/dri/r128/r128_tex.c
    @@ -468,7 +468,7 @@ static void r128TexEnv( GLcontext *ctx, GLenum target,
     	  * certain point.  It is better than completely ignoring the LOD
     	  * bias.  Unfortunately there isn't much range in the bias, the
     	  * spec mentions strides that vary between 0.5 and 2.0 but these
    -	  * numbers don't seem to relate the the GL LOD bias value at all.
    +	  * numbers don't seem to relate to the GL LOD bias value at all.
     	  */
     	 if ( param[0] >= 1.0 ) {
     	    bias = -128;
    diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
    index 9ea81fd5059..c9c1346c3a2 100644
    --- a/src/mesa/drivers/dri/r200/Makefile
    +++ b/src/mesa/drivers/dri/r200/Makefile
    @@ -34,7 +34,6 @@ DRIVER_SOURCES = r200_context.c \
     		 r200_state.c \
     		 r200_state_init.c \
     		 r200_cmdbuf.c \
    -		 r200_pixel.c \
     		 r200_tex.c \
     		 r200_texstate.c \
     		 r200_tcl.c \
    diff --git a/src/mesa/drivers/dri/r200/r200_blit.c b/src/mesa/drivers/dri/r200/r200_blit.c
    index 30757600934..e187fc0f61e 100644
    --- a/src/mesa/drivers/dri/r200/r200_blit.c
    +++ b/src/mesa/drivers/dri/r200/r200_blit.c
    @@ -48,6 +48,11 @@ unsigned r200_check_blit(gl_format mesa_format)
         case MESA_FORMAT_ARGB4444:
         case MESA_FORMAT_ARGB1555:
         case MESA_FORMAT_A8:
    +    case MESA_FORMAT_L8:
    +    case MESA_FORMAT_I8:
    +    /* swizzled */
    +    case MESA_FORMAT_RGBA8888:
    +    case MESA_FORMAT_RGBA8888_REV:
     	    break;
         default:
     	    return 0;
    @@ -86,7 +91,8 @@ static inline void emit_vtx_state(struct r200_context *r200)
     }
     
     static void inline emit_tx_setup(struct r200_context *r200,
    -				 gl_format mesa_format,
    +				 gl_format src_mesa_format,
    +				 gl_format dst_mesa_format,
     				 struct radeon_bo *bo,
     				 intptr_t offset,
     				 unsigned width,
    @@ -101,10 +107,16 @@ static void inline emit_tx_setup(struct r200_context *r200,
         assert(offset % 32 == 0);
     
         /* XXX others?  BE/LE? */
    -    switch (mesa_format) {
    +    switch (src_mesa_format) {
         case MESA_FORMAT_ARGB8888:
     	    txformat |= R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP;
     	    break;
    +    case MESA_FORMAT_RGBA8888:
    +	    txformat |= R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP;
    +	    break;
    +    case MESA_FORMAT_RGBA8888_REV:
    +	    txformat |= R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP;
    +	    break;
         case MESA_FORMAT_XRGB8888:
     	    txformat |= R200_TXFORMAT_ARGB8888;
     	    break;
    @@ -118,26 +130,143 @@ static void inline emit_tx_setup(struct r200_context *r200,
     	    txformat |= R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP;
     	    break;
         case MESA_FORMAT_A8:
    +    case MESA_FORMAT_I8:
     	    txformat |= R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP;
     	    break;
    +    case MESA_FORMAT_L8:
    +	    txformat |= R200_TXFORMAT_I8;
    +	    break;
    +    case MESA_FORMAT_AL88:
    +	    txformat |= R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP;
    +	    break;
         default:
     	    break;
         }
     
    -    BEGIN_BATCH(28);
    -    OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
    +    switch (dst_mesa_format) {
    +    case MESA_FORMAT_ARGB8888:
    +    case MESA_FORMAT_XRGB8888:
    +    case MESA_FORMAT_RGB565:
    +    case MESA_FORMAT_ARGB4444:
    +    case MESA_FORMAT_ARGB1555:
    +    case MESA_FORMAT_A8:
    +    case MESA_FORMAT_L8:
    +    case MESA_FORMAT_I8:
    +    default:
    +	    /* no swizzle required */
    +	    BEGIN_BATCH(10);
    +	    OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
    +					      RADEON_TEX_BLEND_0_ENABLE));
    +	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
    +						  R200_TXC_ARG_B_ZERO |
    +						  R200_TXC_ARG_C_R0_COLOR |
    +						  R200_TXC_OP_MADD));
    +	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
    +						   R200_TXC_OUTPUT_REG_R0));
    +	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
    +						  R200_TXA_ARG_B_ZERO |
    +						  R200_TXA_ARG_C_R0_ALPHA |
    +						  R200_TXA_OP_MADD));
    +	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, (R200_TXA_CLAMP_0_1 |
    +						   R200_TXA_OUTPUT_REG_R0));
    +	    END_BATCH();
    +	    break;
    +    case MESA_FORMAT_RGBA8888:
    +	    BEGIN_BATCH(10);
    +	    OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
    +					      RADEON_TEX_BLEND_0_ENABLE));
    +	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
    +						  R200_TXC_ARG_B_ZERO |
    +						  R200_TXC_ARG_C_R0_COLOR |
    +						  R200_TXC_OP_MADD));
    +	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
    +						   R200_TXC_OUTPUT_ROTATE_GBA |
    +						   R200_TXC_OUTPUT_REG_R0));
    +	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
    +						  R200_TXA_ARG_B_ZERO |
    +						  R200_TXA_ARG_C_R0_ALPHA |
    +						  R200_TXA_OP_MADD));
    +	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, (R200_TXA_CLAMP_0_1 |
    +						   (R200_TXA_REPL_RED << R200_TXA_REPL_ARG_C_SHIFT) |
    +						   R200_TXA_OUTPUT_REG_R0));
    +	    END_BATCH();
    +	    break;
    +    case MESA_FORMAT_RGBA8888_REV:
    +	    BEGIN_BATCH(34);
    +	    OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
    +					      RADEON_TEX_BLEND_0_ENABLE |
    +					      RADEON_TEX_BLEND_1_ENABLE |
    +					      RADEON_TEX_BLEND_2_ENABLE |
    +					      RADEON_TEX_BLEND_3_ENABLE));
    +	    /* r1.r = r0.b */
    +	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
    +						  R200_TXC_ARG_B_ZERO |
    +						  R200_TXC_ARG_C_R0_COLOR |
    +						  R200_TXC_OP_MADD));
    +	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
    +						   R200_TXC_OUTPUT_MASK_R |
    +						   (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_C_SHIFT) |
    +						   R200_TXC_OUTPUT_REG_R1));
    +	    /* r1.a = r0.a */
    +	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
    +						  R200_TXA_ARG_B_ZERO |
    +						  R200_TXA_ARG_C_R0_ALPHA |
    +						  R200_TXA_OP_MADD));
    +	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, (R200_TXA_CLAMP_0_1 |
    +						   R200_TXA_OUTPUT_REG_R1));
    +	    /* r1.g = r0.g */
    +	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_1, (R200_TXC_ARG_A_ZERO |
    +						  R200_TXC_ARG_B_ZERO |
    +						  R200_TXC_ARG_C_R0_COLOR |
    +						  R200_TXC_OP_MADD));
    +	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_1, (R200_TXC_CLAMP_0_1 |
    +						   R200_TXC_OUTPUT_MASK_G |
    +						   (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_C_SHIFT) |
    +						   R200_TXC_OUTPUT_REG_R1));
    +	    /* r1.a = r0.a */
    +	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_1, (R200_TXA_ARG_A_ZERO |
    +						  R200_TXA_ARG_B_ZERO |
    +						  R200_TXA_ARG_C_R0_ALPHA |
    +						  R200_TXA_OP_MADD));
    +	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_1, (R200_TXA_CLAMP_0_1 |
    +						   R200_TXA_OUTPUT_REG_R1));
    +	    /* r1.b = r0.r */
    +	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_2, (R200_TXC_ARG_A_ZERO |
    +						  R200_TXC_ARG_B_ZERO |
    +						  R200_TXC_ARG_C_R0_COLOR |
    +						  R200_TXC_OP_MADD));
    +	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_2, (R200_TXC_CLAMP_0_1 |
    +						   R200_TXC_OUTPUT_MASK_B |
    +						   (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_C_SHIFT) |
    +						   R200_TXC_OUTPUT_REG_R1));
    +	    /* r1.a = r0.a */
    +	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_2, (R200_TXA_ARG_A_ZERO |
    +						  R200_TXA_ARG_B_ZERO |
    +						  R200_TXA_ARG_C_R0_ALPHA |
    +						  R200_TXA_OP_MADD));
    +	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_2, (R200_TXA_CLAMP_0_1 |
    +						   R200_TXA_OUTPUT_REG_R1));
    +	    /* r0.rgb = r1.rgb */
    +	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_3, (R200_TXC_ARG_A_ZERO |
    +						  R200_TXC_ARG_B_ZERO |
    +						  R200_TXC_ARG_C_R1_COLOR |
    +						  R200_TXC_OP_MADD));
    +	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_3, (R200_TXC_CLAMP_0_1 |
    +						   R200_TXC_OUTPUT_REG_R0));
    +	    /* r0.a = r1.a */
    +	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_3, (R200_TXA_ARG_A_ZERO |
    +						  R200_TXA_ARG_B_ZERO |
    +						  R200_TXA_ARG_C_R1_ALPHA |
    +						  R200_TXA_OP_MADD));
    +	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_3, (R200_TXA_CLAMP_0_1 |
    +						   R200_TXA_OUTPUT_REG_R0));
    +	    END_BATCH();
    +	    break;
    +    }
    +
    +    BEGIN_BATCH(18);
         OUT_BATCH_REGVAL(R200_PP_CNTL_X, 0);
         OUT_BATCH_REGVAL(R200_PP_TXMULTI_CTL_0, 0);
    -    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
    -					  R200_TXC_ARG_B_ZERO |
    -					  R200_TXC_ARG_C_R0_COLOR |
    -					  R200_TXC_OP_MADD));
    -    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
    -    OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
    -					  R200_TXA_ARG_B_ZERO |
    -					  R200_TXA_ARG_C_R0_ALPHA |
    -					  R200_TXA_OP_MADD));
    -    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
         OUT_BATCH_REGVAL(R200_PP_TXFILTER_0, (R200_CLAMP_S_CLAMP_LAST |
     					  R200_CLAMP_T_CLAMP_LAST |
     					  R200_MAG_FILTER_NEAREST |
    @@ -146,7 +275,7 @@ static void inline emit_tx_setup(struct r200_context *r200,
         OUT_BATCH_REGVAL(R200_PP_TXFORMAT_X_0, 0);
         OUT_BATCH_REGVAL(R200_PP_TXSIZE_0, ((width - 1) |
     					((height - 1) << RADEON_TEX_VSIZE_SHIFT)));
    -    OUT_BATCH_REGVAL(R200_PP_TXPITCH_0, pitch * _mesa_get_format_bytes(mesa_format) - 32);
    +    OUT_BATCH_REGVAL(R200_PP_TXPITCH_0, pitch * _mesa_get_format_bytes(src_mesa_format) - 32);
     
         OUT_BATCH_REGSEQ(R200_PP_TXOFFSET_0, 1);
         OUT_BATCH_RELOC(0, bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
    @@ -170,6 +299,8 @@ static inline void emit_cb_setup(struct r200_context *r200,
         switch (mesa_format) {
         case MESA_FORMAT_ARGB8888:
         case MESA_FORMAT_XRGB8888:
    +    case MESA_FORMAT_RGBA8888:
    +    case MESA_FORMAT_RGBA8888_REV:
     	    dst_format = RADEON_COLOR_FORMAT_ARGB8888;
     	    break;
         case MESA_FORMAT_RGB565:
    @@ -182,6 +313,8 @@ static inline void emit_cb_setup(struct r200_context *r200,
     	    dst_format = RADEON_COLOR_FORMAT_ARGB1555;
     	    break;
         case MESA_FORMAT_A8:
    +    case MESA_FORMAT_L8:
    +    case MESA_FORMAT_I8:
     	    dst_format = RADEON_COLOR_FORMAT_RGB8;
     	    break;
         default:
    @@ -384,15 +517,15 @@ unsigned r200_blit(GLcontext *ctx,
         /* Flush is needed to make sure that source buffer has correct data */
         radeonFlush(r200->radeon.glCtx);
     
    -    rcommonEnsureCmdBufSpace(&r200->radeon, 78, __FUNCTION__);
    +    rcommonEnsureCmdBufSpace(&r200->radeon, 102, __FUNCTION__);
     
         if (!validate_buffers(r200, src_bo, dst_bo))
             return GL_FALSE;
     
         /* 14 */
         emit_vtx_state(r200);
    -    /* 28 */
    -    emit_tx_setup(r200, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch);
    +    /* 52 */
    +    emit_tx_setup(r200, src_mesaformat, dst_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch);
         /* 22 */
         emit_cb_setup(r200, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
         /* 14 */
    diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
    index 4f1a56658cc..36a29350ccc 100644
    --- a/src/mesa/drivers/dri/r200/r200_context.c
    +++ b/src/mesa/drivers/dri/r200/r200_context.c
    @@ -51,7 +51,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     #include "r200_context.h"
     #include "r200_ioctl.h"
     #include "r200_state.h"
    -#include "r200_pixel.h"
     #include "r200_tex.h"
     #include "r200_swtcl.h"
     #include "r200_tcl.h"
    @@ -325,7 +324,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
        _mesa_init_driver_functions(&functions);
        r200InitDriverFuncs(&functions);
        r200InitIoctlFuncs(&functions);
    -   r200InitStateFuncs(&functions);
    +   r200InitStateFuncs(&rmesa->radeon, &functions);
        r200InitTextureFuncs(&rmesa->radeon, &functions);
        r200InitShaderFuncs(&functions);
        radeonInitQueryObjFunctions(&functions);
    @@ -474,7 +473,6 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
        /* XXX these should really go right after _mesa_init_driver_functions() */
        radeon_fbo_init(&rmesa->radeon);
        radeonInitSpanFuncs( ctx );
    -   r200InitPixelFuncs( ctx );
        r200InitTnlFuncs( ctx );
        r200InitState( rmesa );
        r200InitSwtcl( ctx );
    diff --git a/src/mesa/drivers/dri/r200/r200_pixel.c b/src/mesa/drivers/dri/r200/r200_pixel.c
    deleted file mode 100644
    index bfb7e2a2ed3..00000000000
    --- a/src/mesa/drivers/dri/r200/r200_pixel.c
    +++ /dev/null
    @@ -1,494 +0,0 @@
    -/*
    -Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
    -
    -The Weather Channel (TM) funded Tungsten Graphics to develop the
    -initial release of the Radeon 8500 driver under the XFree86 license.
    -This notice must be preserved.
    -
    -Permission is hereby granted, free of charge, to any person obtaining
    -a copy of this software and associated documentation files (the
    -"Software"), to deal in the Software without restriction, including
    -without limitation the rights to use, copy, modify, merge, publish,
    -distribute, sublicense, and/or sell copies of the Software, and to
    -permit persons to whom the Software is furnished to do so, subject to
    -the following conditions:
    -
    -The above copyright notice and this permission notice (including the
    -next paragraph) shall be included in all copies or substantial
    -portions of the Software.
    -
    -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
    -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
    -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
    -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    -*/
    -
    -/*
    - * Authors:
    - *   Keith Whitwell 
    - */
    -
    -#include "main/glheader.h"
    -#include "main/enums.h"
    -#include "main/mtypes.h"
    -#include "main/macros.h"
    -#include "swrast/swrast.h"
    -
    -#include "r200_context.h"
    -#include "r200_ioctl.h"
    -#include "r200_pixel.h"
    -#include "r200_swtcl.h"
    -
    -#include "drirenderbuffer.h"
    -
    -
    -static GLboolean
    -check_color( const GLcontext *ctx, GLenum type, GLenum format,
    -	     const struct gl_pixelstore_attrib *packing,
    -	     const void *pixels, GLint sz, GLint pitch )
    -{
    -   r200ContextPtr rmesa = R200_CONTEXT(ctx);
    -   GLuint cpp = rmesa->radeon.radeonScreen->cpp;
    -
    -   if (R200_DEBUG & RADEON_PIXEL)
    -      fprintf(stderr, "%s\n", __FUNCTION__);
    -
    -   if (	(pitch & 63) ||
    -	ctx->_ImageTransferState ||
    -	packing->SwapBytes ||
    -	packing->LsbFirst) {
    -      if (R200_DEBUG & RADEON_PIXEL)
    -	 fprintf(stderr, "%s: failed 1\n", __FUNCTION__);
    -      return GL_FALSE;
    -   }
    -
    -   if ( type == GL_UNSIGNED_INT_8_8_8_8_REV &&
    -	cpp == 4 &&
    -	format == GL_BGRA ) {
    -      if (R200_DEBUG & RADEON_PIXEL)
    -	 fprintf(stderr, "%s: passed 2\n", __FUNCTION__);
    -      return GL_TRUE;
    -   }
    -
    -   if (R200_DEBUG & RADEON_PIXEL)
    -      fprintf(stderr, "%s: failed\n", __FUNCTION__);
    -
    -   return GL_FALSE;
    -}
    -
    -static GLboolean
    -check_color_per_fragment_ops( const GLcontext *ctx )
    -{
    -   int result;
    -   result = (!(     ctx->Color.AlphaEnabled ||
    -		    ctx->Depth.Test ||
    -		    ctx->Fog.Enabled ||
    -		    ctx->Scissor.Enabled ||
    -		    ctx->Stencil._Enabled ||
    -		    !ctx->Color.ColorMask[0][0] ||
    -		    !ctx->Color.ColorMask[0][1] ||
    -		    !ctx->Color.ColorMask[0][2] ||
    -		    !ctx->Color.ColorMask[0][3] ||
    -		    ctx->Color.ColorLogicOpEnabled ||
    -		    ctx->Texture._EnabledUnits
    -           ) &&
    -	   ctx->Current.RasterPosValid);
    -
    -   return result;
    -}
    -
    -
    -#if 0
    -static GLboolean
    -clip_pixelrect( const GLcontext *ctx,
    -		const GLframebuffer *buffer,
    -		GLint *x, GLint *y,
    -		GLsizei *width, GLsizei *height,
    -		GLint *size )
    -{
    -   r200ContextPtr rmesa = R200_CONTEXT(ctx);
    -
    -   /* left clipping */
    -   if (*x < buffer->_Xmin) {
    -      *width -= (buffer->_Xmin - *x);
    -      *x = buffer->_Xmin;
    -   }
    -
    -   /* right clipping */
    -   if (*x + *width > buffer->_Xmax)
    -      *width -= (*x + *width - buffer->_Xmax - 1);
    -
    -   if (*width <= 0)
    -      return GL_FALSE;
    -
    -   /* bottom clipping */
    -   if (*y < buffer->_Ymin) {
    -      *height -= (buffer->_Ymin - *y);
    -      *y = buffer->_Ymin;
    -   }
    -
    -   /* top clipping */
    -   if (*y + *height > buffer->_Ymax)
    -      *height -= (*y + *height - buffer->_Ymax - 1);
    -
    -   if (*height <= 0)
    -      return GL_FALSE;
    -
    -   *size = ((*y + *height - 1) * rmesa->radeon.radeonScreen->frontPitch +
    -	    (*x + *width - 1) * rmesa->radeon.radeonScreen->cpp);
    -
    -   return GL_TRUE;
    -}
    -#endif
    -
    -static GLboolean
    -r200TryReadPixels( GLcontext *ctx,
    -		  GLint x, GLint y, GLsizei width, GLsizei height,
    -		  GLenum format, GLenum type,
    -		  const struct gl_pixelstore_attrib *pack,
    -		  GLvoid *pixels )
    -{
    -   return GL_FALSE;
    -#if 0
    -   r200ContextPtr rmesa = R200_CONTEXT(ctx);
    -   GLint pitch = pack->RowLength ? pack->RowLength : width;
    -   GLint blit_format;
    -   GLuint cpp = rmesa->radeon.radeonScreen->cpp;
    -   GLint size = width * height * cpp;
    -
    -   if (R200_DEBUG & RADEON_PIXEL)
    -      fprintf(stderr, "%s\n", __FUNCTION__);
    -
    -   /* Only accelerate reading to GART buffers.
    -    */
    -   if ( !r200IsGartMemory(rmesa, pixels,
    -			 pitch * height * rmesa->radeon.radeonScreen->cpp ) ) {
    -      if (R200_DEBUG & RADEON_PIXEL)
    -	 fprintf(stderr, "%s: dest not GART\n", __FUNCTION__);
    -   }
    -
    -   /* Need GL_PACK_INVERT_MESA to cope with upsidedown results from
    -    * blitter:
    -    */
    -   if (!pack->Invert) {
    -      if (R200_DEBUG & RADEON_PIXEL)
    -	 fprintf(stderr, "%s: MESA_PACK_INVERT not set\n", __FUNCTION__);
    -      return GL_FALSE;
    -   }
    -
    -   if (!check_color(ctx, type, format, pack, pixels, size, pitch))
    -      return GL_FALSE;
    -
    -   switch ( rmesa->radeon.radeonScreen->cpp ) {
    -   case 4:
    -      blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
    -      break;
    -   default:
    -      return GL_FALSE;
    -   }
    -
    -
    -   /* Although the blits go on the command buffer, need to do this and
    -    * fire with lock held to guarentee cliprects and drawOffset are
    -    * correct.
    -    *
    -    * This is an unusual situation however, as the code which flushes
    -    * a full command buffer expects to be called unlocked.  As a
    -    * workaround, immediately flush the buffer on aquiring the lock.
    -    */
    -   LOCK_HARDWARE( &rmesa->radeon );
    -
    -   if (rmesa->store.cmd_used)
    -      rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
    -
    -   if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height,
    -		       &size)) {
    -      UNLOCK_HARDWARE( &rmesa->radeon );
    -      if (R200_DEBUG & RADEON_PIXEL)
    -	 fprintf(stderr, "%s totally clipped -- nothing to do\n",
    -		 __FUNCTION__);
    -      return GL_TRUE;
    -   }
    -
    -   {
    -      __DRIdrawable *dPriv = rmesa->radeon.dri.drawable;
    -      driRenderbuffer *drb = (driRenderbuffer *) ctx->ReadBuffer->_ColorReadBuffer;
    -      int nbox = dPriv->numClipRects;
    -      int src_offset = drb->offset
    -		     + rmesa->radeon.radeonScreen->fbLocation;
    -      int src_pitch = drb->pitch * drb->cpp;
    -      int dst_offset = r200GartOffsetFromVirtual( rmesa, pixels );
    -      int dst_pitch = pitch * rmesa->radeon.radeonScreen->cpp;
    -      drm_clip_rect_t *box = dPriv->pClipRects;
    -      int i;
    -
    -      r200EmitWait( rmesa, RADEON_WAIT_3D );
    -
    -      y = dPriv->h - y - height;
    -      x += dPriv->x;
    -      y += dPriv->y;
    -
    -
    -      if (R200_DEBUG & RADEON_PIXEL)
    -	 fprintf(stderr, "readpixel blit src_pitch %d dst_pitch %d\n",
    -		 src_pitch, dst_pitch);
    -
    -      for (i = 0 ; i < nbox ; i++)
    -      {
    -	 GLint bx = box[i].x1;
    -	 GLint by = box[i].y1;
    -	 GLint bw = box[i].x2 - bx;
    -	 GLint bh = box[i].y2 - by;
    -
    -	 if (bx < x) bw -= x - bx, bx = x;
    -	 if (by < y) bh -= y - by, by = y;
    -	 if (bx + bw > x + width) bw = x + width - bx;
    -	 if (by + bh > y + height) bh = y + height - by;
    -	 if (bw <= 0) continue;
    -	 if (bh <= 0) continue;
    -
    -	 r200EmitBlit( rmesa,
    -		       blit_format,
    -		       src_pitch, src_offset,
    -		       dst_pitch, dst_offset,
    -		       bx, by,
    -		       bx - x, by - y,
    -		       bw, bh );
    -      }
    -
    -      rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
    -   }
    -   UNLOCK_HARDWARE( &rmesa->radeon );
    -
    -   radeonFinish( ctx ); /* required by GL */
    -#endif
    -   return GL_TRUE;
    -}
    -
    -static void
    -r200ReadPixels( GLcontext *ctx,
    -		 GLint x, GLint y, GLsizei width, GLsizei height,
    -		 GLenum format, GLenum type,
    -		 const struct gl_pixelstore_attrib *pack,
    -		 GLvoid *pixels )
    -{
    -   if (R200_DEBUG & RADEON_PIXEL)
    -      fprintf(stderr, "%s\n", __FUNCTION__);
    -
    -   if (!r200TryReadPixels( ctx, x, y, width, height, format, type, pack,
    -			   pixels))
    -      _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack,
    -			  pixels);
    -}
    -
    -
    -
    -
    -static void do_draw_pix( GLcontext *ctx,
    -			 GLint x, GLint y, GLsizei width, GLsizei height,
    -			 GLint pitch,
    -			 const void *pixels,
    -			 GLuint planemask)
    -{
    -   if (R200_DEBUG & RADEON_PIXEL)
    -      fprintf(stderr, "%s\n", __FUNCTION__);
    -
    -#if 0
    -   r200ContextPtr rmesa = R200_CONTEXT(ctx);
    -   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
    -   drm_clip_rect_t *box = dPriv->pClipRects;
    -   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0];
    -   driRenderbuffer *drb = (driRenderbuffer *) rb;
    -   int nbox = dPriv->numClipRects;
    -   int i;
    -   int blit_format;
    -   int size;
    -   int src_offset = r200GartOffsetFromVirtual( rmesa, pixels );
    -   int src_pitch = pitch * rmesa->radeon.radeonScreen->cpp;
    -
    -   switch ( rmesa->radeon.radeonScreen->cpp ) {
    -   case 2:
    -      blit_format = R200_CP_COLOR_FORMAT_RGB565;
    -      break;
    -   case 4:
    -      blit_format = R200_CP_COLOR_FORMAT_ARGB8888;
    -      break;
    -   default:
    -      return;
    -   }
    -
    -
    -   LOCK_HARDWARE( &rmesa->radeon );
    -
    -   if (rmesa->store.cmd_used)
    -      rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
    -
    -   y -= height;			/* cope with pixel zoom */
    -
    -   if (!clip_pixelrect(ctx, ctx->DrawBuffer,
    -		       &x, &y, &width, &height,
    -		       &size)) {
    -      UNLOCK_HARDWARE( &rmesa->radeon );
    -      return;
    -   }
    -
    -   y = dPriv->h - y - height; 	/* convert from gl to hardware coords */
    -   x += dPriv->x;
    -   y += dPriv->y;
    -
    -
    -   r200EmitWait( rmesa, RADEON_WAIT_3D );
    -
    -   for (i = 0 ; i < nbox ; i++ )
    -   {
    -      GLint bx = box[i].x1;
    -      GLint by = box[i].y1;
    -      GLint bw = box[i].x2 - bx;
    -      GLint bh = box[i].y2 - by;
    -
    -      if (bx < x) bw -= x - bx, bx = x;
    -      if (by < y) bh -= y - by, by = y;
    -      if (bx + bw > x + width) bw = x + width - bx;
    -      if (by + bh > y + height) bh = y + height - by;
    -      if (bw <= 0) continue;
    -      if (bh <= 0) continue;
    -
    -      r200EmitBlit( rmesa,
    -		    blit_format,
    -		    src_pitch, src_offset,
    -		    drb->pitch * drb->cpp,
    -		    drb->offset + rmesa->radeon.radeonScreen->fbLocation,
    -		    bx - x, by - y,
    -		    bx, by,
    -		    bw, bh );
    -   }
    -
    -   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
    -   radeonWaitForIdleLocked( &rmesa->radeon ); /* required by GL */
    -   UNLOCK_HARDWARE( &rmesa->radeon );
    -#endif
    -}
    -
    -
    -
    -
    -static GLboolean
    -r200TryDrawPixels( GLcontext *ctx,
    -		  GLint x, GLint y, GLsizei width, GLsizei height,
    -		  GLenum format, GLenum type,
    -		  const struct gl_pixelstore_attrib *unpack,
    -		  const GLvoid *pixels )
    -{
    -   r200ContextPtr rmesa = R200_CONTEXT(ctx);
    -   GLint pitch = unpack->RowLength ? unpack->RowLength : width;
    -   GLuint planemask;
    -   GLuint cpp = rmesa->radeon.radeonScreen->cpp;
    -   GLint size = height * pitch * cpp;
    -
    -   if (R200_DEBUG & RADEON_PIXEL)
    -      fprintf(stderr, "%s\n", __FUNCTION__);
    -
    -   /* check that we're drawing to exactly one color buffer */
    -   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1)
    -     return GL_FALSE;
    -
    -   switch (format) {
    -   case GL_RGB:
    -   case GL_RGBA:
    -   case GL_BGRA:
    -      planemask = radeonPackColor(cpp,
    -				ctx->Color.ColorMask[0][RCOMP],
    -				ctx->Color.ColorMask[0][GCOMP],
    -				ctx->Color.ColorMask[0][BCOMP],
    -				ctx->Color.ColorMask[0][ACOMP]);
    -
    -      if (cpp == 2)
    -	 planemask |= planemask << 16;
    -
    -      if (planemask != ~0)
    -	 return GL_FALSE;	/* fix me -- should be possible */
    -
    -      /* Can't do conversions on GART reads/draws.
    -       */
    -      if ( !r200IsGartMemory( rmesa, pixels, size ) ) {
    -	 if (R200_DEBUG & RADEON_PIXEL)
    -	    fprintf(stderr, "%s: not GART memory\n", __FUNCTION__);
    -	 return GL_FALSE;
    -      }
    -
    -      if (!check_color(ctx, type, format, unpack, pixels, size, pitch)) {
    -	 return GL_FALSE;
    -      }
    -      if (!check_color_per_fragment_ops(ctx)) {
    -	 return GL_FALSE;
    -      }
    -
    -      if (ctx->Pixel.ZoomX != 1.0F ||
    -	  ctx->Pixel.ZoomY != -1.0F)
    -	 return GL_FALSE;
    -      break;
    -
    -   default:
    -      return GL_FALSE;
    -   }
    -
    -   if (0)// r200IsGartMemory(rmesa, pixels, size) )
    -   {
    -      do_draw_pix( ctx, x, y, width, height, pitch, pixels, planemask );
    -      return GL_TRUE;
    -   }
    -   else if (0)
    -   {
    -      /* Pixels is in regular memory -- get dma buffers and perform
    -       * upload through them.
    -       */
    -   }
    -   else
    -      return GL_FALSE;
    -}
    -
    -static void
    -r200DrawPixels( GLcontext *ctx,
    -		 GLint x, GLint y, GLsizei width, GLsizei height,
    -		 GLenum format, GLenum type,
    -		 const struct gl_pixelstore_attrib *unpack,
    -		 const GLvoid *pixels )
    -{
    -   if (R200_DEBUG & RADEON_PIXEL)
    -      fprintf(stderr, "%s\n", __FUNCTION__);
    -
    -   if (!r200TryDrawPixels( ctx, x, y, width, height, format, type,
    -			  unpack, pixels ))
    -      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
    -			  unpack, pixels );
    -}
    -
    -
    -static void
    -r200Bitmap( GLcontext *ctx, GLint px, GLint py,
    -		  GLsizei width, GLsizei height,
    -		  const struct gl_pixelstore_attrib *unpack,
    -		  const GLubyte *bitmap )
    -{
    -   r200ContextPtr rmesa = R200_CONTEXT(ctx);
    -
    -   if (rmesa->radeon.Fallback)
    -      _swrast_Bitmap( ctx, px, py, width, height, unpack, bitmap );
    -   else
    -      r200PointsBitmap( ctx, px, py, width, height, unpack, bitmap );
    -}
    -
    -
    -
    -void r200InitPixelFuncs( GLcontext *ctx )
    -{
    -   if (!getenv("R200_NO_BLITS")) {
    -      ctx->Driver.ReadPixels = r200ReadPixels;
    -      ctx->Driver.DrawPixels = r200DrawPixels;
    -      if (getenv("R200_HW_BITMAP"))
    -	 ctx->Driver.Bitmap = r200Bitmap;
    -   }
    -}
    diff --git a/src/mesa/drivers/dri/r200/r200_pixel.h b/src/mesa/drivers/dri/r200/r200_pixel.h
    deleted file mode 100644
    index e62aa05d749..00000000000
    --- a/src/mesa/drivers/dri/r200/r200_pixel.h
    +++ /dev/null
    @@ -1,39 +0,0 @@
    -/*
    -Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
    -
    -The Weather Channel (TM) funded Tungsten Graphics to develop the
    -initial release of the Radeon 8500 driver under the XFree86 license.
    -This notice must be preserved.
    -
    -Permission is hereby granted, free of charge, to any person obtaining
    -a copy of this software and associated documentation files (the
    -"Software"), to deal in the Software without restriction, including
    -without limitation the rights to use, copy, modify, merge, publish,
    -distribute, sublicense, and/or sell copies of the Software, and to
    -permit persons to whom the Software is furnished to do so, subject to
    -the following conditions:
    -
    -The above copyright notice and this permission notice (including the
    -next paragraph) shall be included in all copies or substantial
    -portions of the Software.
    -
    -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
    -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
    -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
    -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    -*/
    -
    -/*
    - * Authors:
    - *   Keith Whitwell 
    - */
    -
    -#ifndef __R200_PIXEL_H__
    -#define __R200_PIXEL_H__
    -
    -extern void r200InitPixelFuncs( GLcontext *ctx );
    -
    -#endif
    diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h
    index 59115212cee..e331be223b8 100644
    --- a/src/mesa/drivers/dri/r200/r200_reg.h
    +++ b/src/mesa/drivers/dri/r200/r200_reg.h
    @@ -690,7 +690,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     #       define R200_PVS_CNTL_1_PROGRAM_START_SHIFT   0
     #       define R200_PVS_CNTL_1_POS_END_SHIFT         10
     #       define R200_PVS_CNTL_1_PROGRAM_END_SHIFT     20
    -/* Addresses are relative the the vertex program parameters area. */
    +/* Addresses are relative to the vertex program parameters area. */
     #define R200_VAP_PVS_CNTL_2                 0x22d4
     #       define R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0
     #       define R200_PVS_CNTL_2_PARAM_COUNT_SHIFT  16
    @@ -1265,6 +1265,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     #define     R200_TXC_OUTPUT_MASK_G                 (5 << 20)
     #define     R200_TXC_OUTPUT_MASK_B                 (6 << 20)
     #define     R200_TXC_OUTPUT_MASK_NONE              (7 << 20)
    +#define     R200_TXC_OUTPUT_ROTATE_RGB             (0 << 24)
    +#define     R200_TXC_OUTPUT_ROTATE_ARG             (1 << 24)
    +#define     R200_TXC_OUTPUT_ROTATE_GBA             (2 << 24)
    +#define     R200_TXC_OUTPUT_ROTATE_RGA             (3 << 24)
     #define     R200_TXC_REPL_NORMAL                   0
     #define     R200_TXC_REPL_RED                      1
     #define     R200_TXC_REPL_GREEN                    2
    diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
    index 050e5aa8770..9c2ac05ad6c 100644
    --- a/src/mesa/drivers/dri/r200/r200_state.c
    +++ b/src/mesa/drivers/dri/r200/r200_state.c
    @@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     #include "tnl/tnl.h"
     #include "tnl/t_pipeline.h"
     #include "swrast_setup/swrast_setup.h"
    +#include "drivers/common/meta.h"
     
     #include "radeon_common.h"
     #include "radeon_mipmap_tree.h"
    @@ -2487,7 +2488,7 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
     }
     /* Initialize the driver's state functions.
      */
    -void r200InitStateFuncs( struct dd_function_table *functions )
    +void r200InitStateFuncs( radeonContextPtr radeon, struct dd_function_table *functions )
     {
        functions->UpdateState		= r200InvalidateState;
        functions->LightingSpaceChange	= r200LightingSpaceChange;
    @@ -2495,6 +2496,12 @@ void r200InitStateFuncs( struct dd_function_table *functions )
        functions->DrawBuffer		= radeonDrawBuffer;
        functions->ReadBuffer		= radeonReadBuffer;
     
    +   if (radeon->radeonScreen->kernel_mm) {
    +	   functions->CopyPixels                = _mesa_meta_CopyPixels;
    +	   functions->DrawPixels                = _mesa_meta_DrawPixels;
    +	   functions->ReadPixels                = radeonReadPixels;
    +   }
    +
        functions->AlphaFunc			= r200AlphaFunc;
        functions->BlendColor		= r200BlendColor;
        functions->BlendEquationSeparate	= r200BlendEquationSeparate;
    diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h
    index 7b9b0c106aa..327ba837e25 100644
    --- a/src/mesa/drivers/dri/r200/r200_state.h
    +++ b/src/mesa/drivers/dri/r200/r200_state.h
    @@ -38,7 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     #include "r200_context.h"
     
     extern void r200InitState( r200ContextPtr rmesa );
    -extern void r200InitStateFuncs( struct dd_function_table *functions );
    +extern void r200InitStateFuncs( radeonContextPtr radeon, struct dd_function_table *functions );
     extern void r200InitTnlFuncs( GLcontext *ctx );
     
     extern void r200UpdateMaterial( GLcontext *ctx );
    diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
    index 364e0ba6b61..cfeb5407e91 100644
    --- a/src/mesa/drivers/dri/r300/r300_context.c
    +++ b/src/mesa/drivers/dri/r300/r300_context.c
    @@ -500,7 +500,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
     
     	_mesa_init_driver_functions(&functions);
     	r300InitIoctlFuncs(&functions);
    -	r300InitStateFuncs(&functions);
    +	r300InitStateFuncs(&r300->radeon, &functions);
     	r300InitTextureFuncs(&r300->radeon, &functions);
     	r300InitShaderFuncs(&functions);
     	radeonInitQueryObjFunctions(&functions);
    diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
    index d18ebab8ff2..ac93563ed9e 100644
    --- a/src/mesa/drivers/dri/r300/r300_reg.h
    +++ b/src/mesa/drivers/dri/r300/r300_reg.h
    @@ -482,7 +482,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
     #       define R300_PVS_FIRST_INST_SHIFT         0
     #       define R300_PVS_XYZW_VALID_INST_SHIFT    10
     #       define R300_PVS_LAST_INST_SHIFT          20
    -/* Addresses are relative the the vertex program parameters area. */
    +/* Addresses are relative to the vertex program parameters area. */
     #define R300_VAP_PVS_CONST_CNTL             0x22D4
     #       define R300_PVS_CONST_BASE_OFFSET_SHIFT  0
     #       define R300_PVS_MAX_CONST_ADDR_SHIFT     16
    @@ -1760,7 +1760,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
      * The destination register index is in FPI1 (color) and FPI3 (alpha)
      * together with enable bits.
      * There are separate enable bits for writing into temporary registers
    - * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_*
    + * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_*
      * /DSTA_OUTPUT). You can write to both at once, or not write at all (the
      * same index must be used for both).
      *
    diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
    index 87489412419..749a2464e7c 100644
    --- a/src/mesa/drivers/dri/r300/r300_state.c
    +++ b/src/mesa/drivers/dri/r300/r300_state.c
    @@ -590,7 +590,7 @@ static void r300SetDepthState(GLcontext * ctx)
     					    R500_STENCIL_REFMASK_FRONT_BACK);
     	r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT);
     
    -	if (ctx->Depth.Test) {
    +	if (ctx->Depth.Test && ctx->DrawBuffer->_DepthBuffer) {
     		r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE;
     		if (ctx->Depth.Mask)
     			r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_WRITE_ENABLE;
    @@ -2354,7 +2354,7 @@ static void r300RenderMode(GLcontext * ctx, GLenum mode)
     /**
      * Initialize driver's state callback functions
      */
    -void r300InitStateFuncs(struct dd_function_table *functions)
    +void r300InitStateFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
     {
     
     	functions->UpdateState = r300InvalidateState;
    @@ -2396,9 +2396,11 @@ void r300InitStateFuncs(struct dd_function_table *functions)
     	functions->DrawBuffer = radeonDrawBuffer;
     	functions->ReadBuffer = radeonReadBuffer;
     
    -	functions->CopyPixels = _mesa_meta_CopyPixels;
    -	functions->DrawPixels = _mesa_meta_DrawPixels;
    -	functions->ReadPixels = radeonReadPixels;
    +	if (radeon->radeonScreen->kernel_mm) {
    +		functions->CopyPixels = _mesa_meta_CopyPixels;
    +		functions->DrawPixels = _mesa_meta_DrawPixels;
    +		functions->ReadPixels = radeonReadPixels;
    +	}
     }
     
     void r300InitShaderFunctions(r300ContextPtr r300)
    diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
    index d46bf9f1796..e70f84f4e4b 100644
    --- a/src/mesa/drivers/dri/r300/r300_state.h
    +++ b/src/mesa/drivers/dri/r300/r300_state.h
    @@ -55,7 +55,7 @@ void r300UpdateDrawBuffer (GLcontext * ctx);
     void r300UpdateShaders (r300ContextPtr rmesa);
     void r300UpdateShaderStates (r300ContextPtr rmesa);
     void r300InitState (r300ContextPtr r300);
    -void r300InitStateFuncs (struct dd_function_table *functions);
    +void r300InitStateFuncs (radeonContextPtr radeon, struct dd_function_table *functions);
     void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count);
     void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten);
     
    diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
    index 76d5027649e..fddac2f9bdc 100644
    --- a/src/mesa/drivers/dri/r600/r600_context.c
    +++ b/src/mesa/drivers/dri/r600/r600_context.c
    @@ -384,7 +384,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
     	 */
     	_mesa_init_driver_functions(&functions);
     
    -	r700InitStateFuncs(&functions);
    +	r700InitStateFuncs(&r600->radeon, &functions);
     	r600InitTextureFuncs(&r600->radeon, &functions);
     	r700InitShaderFuncs(&functions);
     	radeonInitQueryObjFunctions(&functions);
    diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
    index 6f156b54096..2953ffd0288 100644
    --- a/src/mesa/drivers/dri/r600/r700_state.c
    +++ b/src/mesa/drivers/dri/r600/r700_state.c
    @@ -39,6 +39,7 @@
     #include "swrast_setup/swrast_setup.h"
     #include "main/api_arrayelt.h"
     #include "main/framebuffer.h"
    +#include "drivers/common/meta.h"
     
     #include "shader/prog_parameter.h"
     #include "shader/prog_statevars.h"
    @@ -1816,7 +1817,7 @@ void r700InitState(GLcontext * ctx) //-------------------
     
     }
     
    -void r700InitStateFuncs(struct dd_function_table *functions) //-----------------
    +void r700InitStateFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
     {
     	functions->UpdateState = r700InvalidateState;
     	functions->AlphaFunc = r700AlphaFunc;
    @@ -1857,8 +1858,13 @@ void r700InitStateFuncs(struct dd_function_table *functions) //-----------------
     
     	functions->Scissor = radeonScissor;
     
    -	functions->DrawBuffer		= radeonDrawBuffer;
    -	functions->ReadBuffer		= radeonReadBuffer;
    +	functions->DrawBuffer = radeonDrawBuffer;
    +	functions->ReadBuffer = radeonReadBuffer;
     
    +	if (radeon->radeonScreen->kernel_mm) {
    +		functions->CopyPixels = _mesa_meta_CopyPixels;
    +		functions->DrawPixels = _mesa_meta_DrawPixels;
    +		functions->ReadPixels = radeonReadPixels;
    +	}
     }
     
    diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h
    index 60c6a7f23ca..56885e0b154 100644
    --- a/src/mesa/drivers/dri/r600/r700_state.h
    +++ b/src/mesa/drivers/dri/r600/r700_state.h
    @@ -40,7 +40,7 @@ extern void r700UpdateShaderStates(GLcontext * ctx);
     extern void r700UpdateViewportOffset(GLcontext * ctx);
     
     extern void r700InitState (GLcontext * ctx);
    -extern void r700InitStateFuncs (struct dd_function_table *functions);
    +extern void r700InitStateFuncs (radeonContextPtr radeon, struct dd_function_table *functions);
     
     extern void r700SetScissor(context_t *context);
     
    diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
    index 07e0adc8905..05c65164d60 100644
    --- a/src/mesa/drivers/dri/r600/r700_vertprog.c
    +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
    @@ -42,7 +42,7 @@
     #include "radeon_debug.h"
     #include "r600_context.h"
     #include "r600_cmdbuf.h"
    -#include "shader/programopt.c"
    +#include "shader/programopt.h"
     
     #include "r700_debug.h"
     #include "r700_vertprog.h"
    diff --git a/src/mesa/drivers/dri/radeon/radeon_blit.c b/src/mesa/drivers/dri/radeon/radeon_blit.c
    index e1e1f215508..143822361e1 100644
    --- a/src/mesa/drivers/dri/radeon/radeon_blit.c
    +++ b/src/mesa/drivers/dri/radeon/radeon_blit.c
    @@ -48,6 +48,8 @@ unsigned r100_check_blit(gl_format mesa_format)
         case MESA_FORMAT_ARGB4444:
         case MESA_FORMAT_ARGB1555:
         case MESA_FORMAT_A8:
    +    case MESA_FORMAT_L8:
    +    case MESA_FORMAT_I8:
     	    break;
         default:
     	    return 0;
    @@ -103,6 +105,9 @@ static void inline emit_tx_setup(struct r100_context *r100,
         case MESA_FORMAT_ARGB8888:
     	    txformat |= RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
     	    break;
    +    case MESA_FORMAT_RGBA8888:
    +            txformat |= RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
    +            break;
         case MESA_FORMAT_XRGB8888:
     	    txformat |= RADEON_TXFORMAT_ARGB8888;
     	    break;
    @@ -116,8 +121,15 @@ static void inline emit_tx_setup(struct r100_context *r100,
     	    txformat |= RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP;
     	    break;
         case MESA_FORMAT_A8:
    +    case MESA_FORMAT_I8:
     	    txformat |= RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP;
     	    break;
    +    case MESA_FORMAT_L8:
    +            txformat |= RADEON_TXFORMAT_I8;
    +            break;
    +    case MESA_FORMAT_AL88:
    +            txformat |= RADEON_TXFORMAT_AI88 | RADEON_TXFORMAT_ALPHA_IN_MAP;
    +            break;
         default:
     	    break;
         }
    @@ -177,6 +189,8 @@ static inline void emit_cb_setup(struct r100_context *r100,
     	    dst_format = RADEON_COLOR_FORMAT_ARGB1555;
     	    break;
         case MESA_FORMAT_A8:
    +    case MESA_FORMAT_L8:
    +    case MESA_FORMAT_I8:
     	    dst_format = RADEON_COLOR_FORMAT_RGB8;
     	    break;
         default:
    diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
    index 78c5f5dd572..7f5fb99fa4f 100644
    --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
    +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
    @@ -553,6 +553,8 @@ static radeon_mipmap_tree * get_biggest_matching_miptree(radeonTexObj *texObj,
     	}
     
     	if (mtCount == 0) {
    +		free(mtSizes);
    +		free(mts);
     		return NULL;
     	}
     
    diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
    index 27841938e66..b180c1d9a5c 100644
    --- a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
    +++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
    @@ -96,7 +96,7 @@ do_blit_readpixels(GLcontext * ctx,
             return GL_FALSE;
         }
     
    -    if (ctx->_ImageTransferState) {
    +    if (ctx->_ImageTransferState || ctx->Color._LogicOpEnabled) {
             return GL_FALSE;
         }
     
    diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
    index 0ce97e86972..0afbc19c127 100644
    --- a/src/mesa/drivers/dri/radeon/radeon_state.c
    +++ b/src/mesa/drivers/dri/radeon/radeon_state.c
    @@ -45,6 +45,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     #include "tnl/tnl.h"
     #include "tnl/t_pipeline.h"
     #include "swrast_setup/swrast_setup.h"
    +#include "drivers/common/meta.h"
     
     #include "radeon_context.h"
     #include "radeon_mipmap_tree.h"
    @@ -1900,7 +1901,7 @@ void radeonUploadTexMatrix( r100ContextPtr rmesa,
        So: if we need the q coord in the end (solely determined by the texture
        target, i.e. 2d / 1d / texrect targets) we swap the third and 4th row.
        Additionally, if we don't have texgen but 4 tex coords submitted, we swap
    -   column 3 and 4 (for the 2d / 1d / texrect targets) since the the q coord
    +   column 3 and 4 (for the 2d / 1d / texrect targets) since the q coord
        will get submitted in the "wrong", i.e. 3rd, slot.
        If an app submits 3 coords for 2d targets, we assume it is saving on vertex
        size and using the texture matrix to swap the r and q coords around (ut2k3
    @@ -2248,6 +2249,11 @@ void radeonInitStateFuncs( GLcontext *ctx , GLboolean dri2 )
     
        ctx->Driver.DrawBuffer		= radeonDrawBuffer;
        ctx->Driver.ReadBuffer		= radeonReadBuffer;
    +   if (dri2) {
    +	   ctx->Driver.CopyPixels               = _mesa_meta_CopyPixels;
    +	   ctx->Driver.DrawPixels               = _mesa_meta_DrawPixels;
    +	   ctx->Driver.ReadPixels               = radeonReadPixels;
    +   }
     
        ctx->Driver.AlphaFunc		= radeonAlphaFunc;
        ctx->Driver.BlendEquationSeparate	= radeonBlendEquationSeparate;
    diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
    index a4bb03d5d39..e57d77e7ef2 100644
    --- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
    +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
    @@ -52,15 +52,18 @@ do_copy_texsubimage(GLcontext *ctx,
         gl_format dst_mesaformat;
         unsigned src_width;
         unsigned dst_width;
    +    unsigned flip_y;
     
         if (!radeon->vtbl.blit) {
             return GL_FALSE;
         }
     
         if (_mesa_get_format_bits(timg->base.TexFormat, GL_DEPTH_BITS) > 0) {
    -        rrb = radeon_get_depthbuffer(radeon);
    +        rrb = radeon_renderbuffer(ctx->ReadBuffer->_DepthBuffer);
    +        flip_y = ctx->ReadBuffer->Attachment[BUFFER_DEPTH].Type == GL_NONE;
         } else {
    -        rrb = radeon_get_colorbuffer(radeon);
    +        rrb = radeon_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
    +        flip_y = ctx->ReadBuffer->Attachment[BUFFER_COLOR0].Type == GL_NONE;
         }
     
         if (!timg->mt) {
    @@ -93,6 +96,10 @@ do_copy_texsubimage(GLcontext *ctx,
         src_bpp = _mesa_get_format_bytes(src_mesaformat);
         dst_bpp = _mesa_get_format_bytes(dst_mesaformat);
         if (!radeon->vtbl.check_blit(dst_mesaformat)) {
    +	    /* depth formats tend to be special */
    +	    if (_mesa_get_format_bits(dst_mesaformat, GL_DEPTH_BITS) > 0)
    +		    return GL_FALSE;
    +
     	    if (src_bpp != dst_bpp)
     		    return GL_FALSE;
     
    @@ -120,7 +127,7 @@ do_copy_texsubimage(GLcontext *ctx,
                                  timg->mt->bo, dst_offset, dst_mesaformat,
                                  timg->mt->levels[level].rowstride / dst_bpp,
                                  dst_width, timg->base.Height,
    -                             dstx, dsty, width, height, 1);
    +                             dstx, dsty, width, height, flip_y);
     }
     
     void
    diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c
    index 91ddc3615a1..22b0c46b4f7 100644
    --- a/src/mesa/drivers/windows/gdi/wmesa.c
    +++ b/src/mesa/drivers/windows/gdi/wmesa.c
    @@ -247,16 +247,6 @@ static void wmesa_flush(GLcontext *ctx)
      * span writing interface, which is very slow for a clear operation.
      */
     
    -/*
    - * Set the color index used to clear the color buffer.
    - */
    -static void clear_index(GLcontext *ctx, GLuint index)
    -{
    -    WMesaContext pwc = wmesa_context(ctx);
    -    /* Note that indexed mode is not supported yet */
    -    pwc->clearColorRef = RGB(0,0,0);
    -}
    -
     /*
      * Set the color used to clear the color buffer.
      */
    @@ -482,7 +472,7 @@ static void write_rgba_span_front(const GLcontext *ctx,
           };
        } BGRA;
        BGRA *bgra, c;
    -   int i;
    +   GLuint i;
     
        if (n < 16) {   // the value 16 is just guessed
           y=FLIP(y);
    @@ -827,9 +817,9 @@ static void read_rgba_span_32(const GLcontext *ctx,
         lpdw = ((LPDWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x;
         for (i=0; i> 16;
    -	rgba[i][GCOMP] = (pixel & 0x0000ff00) >> 8;
    -	rgba[i][BCOMP] = (pixel & 0x000000ff);
    +	rgba[i][RCOMP] = (GLubyte)((pixel & 0x00ff0000) >> 16);
    +	rgba[i][GCOMP] = (GLubyte)((pixel & 0x0000ff00) >> 8);
    +	rgba[i][BCOMP] = (GLubyte)(pixel & 0x000000ff);
     	rgba[i][ACOMP] = 255;
         }
     }
    @@ -851,9 +841,9 @@ static void read_rgba_pixels_32(const GLcontext *ctx,
     	GLint y2 = FLIP(y[i]);
     	lpdw = ((LPDWORD)(pwfb->pbPixels + pwfb->ScanWidth * y2)) + x[i];
     	pixel = *lpdw;
    -	rgba[i][RCOMP] = (pixel & 0x00ff0000) >> 16;
    -	rgba[i][GCOMP] = (pixel & 0x0000ff00) >> 8;
    -	rgba[i][BCOMP] = (pixel & 0x000000ff);
    +	rgba[i][RCOMP] = (GLubyte)((pixel & 0x00ff0000) >> 16);
    +	rgba[i][GCOMP] = (GLubyte)((pixel & 0x0000ff00) >> 8);
    +	rgba[i][BCOMP] = (GLubyte)(pixel & 0x000000ff);
     	rgba[i][ACOMP] = 255;
       }
     }
    @@ -1271,7 +1261,7 @@ wmesa_renderbuffer_storage(GLcontext *ctx,
      * on if we're drawing to the front or back color buffer.
      */
     void wmesa_set_renderbuffer_funcs(struct gl_renderbuffer *rb, int pixelformat,
    -                                  BYTE cColorBits, int double_buffer)
    +                                  int cColorBits, int double_buffer)
     {
         if (double_buffer) {
             /* back buffer */
    @@ -1483,7 +1473,6 @@ WMesaContext WMesaCreateContext(HDC hDC,
         functions.GetBufferSize = wmesa_get_buffer_size;
         functions.Flush = wmesa_flush;
         functions.Clear = clear;
    -    functions.ClearIndex = clear_index;
         functions.ClearColor = clear_color;
         functions.ResizeBuffers = wmesa_resize_buffers;
         functions.Viewport = wmesa_viewport;
    diff --git a/src/mesa/drivers/windows/gdi/wmesadef.h b/src/mesa/drivers/windows/gdi/wmesadef.h
    index 83a42e60824..1c0e2451114 100644
    --- a/src/mesa/drivers/windows/gdi/wmesadef.h
    +++ b/src/mesa/drivers/windows/gdi/wmesadef.h
    @@ -27,7 +27,7 @@ struct wmesa_framebuffer
         HDC                 hDC;
         int			pixelformat;
         GLuint		ScanWidth;
    -    BYTE		cColorBits;
    +    int			cColorBits;
         /* back buffer DIB fields */
         HDC                 dib_hDC;
         BITMAPINFO          bmi;
    diff --git a/src/mesa/drivers/x11/xmesa.h b/src/mesa/drivers/x11/xmesa.h
    index 98139af8336..f63626a9702 100644
    --- a/src/mesa/drivers/x11/xmesa.h
    +++ b/src/mesa/drivers/x11/xmesa.h
    @@ -287,7 +287,7 @@ extern void XMesaCopySubBuffer( XMesaBuffer b,
     
     
     /*
    - * Return a pointer to the the Pixmap or XImage being used as the back
    + * Return a pointer to the Pixmap or XImage being used as the back
      * color buffer of an XMesaBuffer.  This function is a way to get "under
      * the hood" of X/Mesa so one can manipulate the back buffer directly.
      * Input:  b - the XMesaBuffer
    diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h
    index 3ffd7661e35..e0a6908228d 100644
    --- a/src/mesa/drivers/x11/xmesaP.h
    +++ b/src/mesa/drivers/x11/xmesaP.h
    @@ -431,7 +431,7 @@ extern const int xmesa_kernel8[DITH_DY * DITH_DX];
      * If pixelformat==PF_HPCR:
      *
      *      HP Color Recovery dithering               (ad@lms.be 30/08/95)
    - *      HP has on it's 8-bit 700-series computers, a feature called
    + *      HP has on its 8-bit 700-series computers, a feature called
      *      'Color Recovery'.  This allows near 24-bit output (so they say).
      *      It is enabled by selecting the 8-bit  TrueColor  visual AND
      *      corresponding  colormap (see tkInitWindow) AND doing some special
    diff --git a/src/mesa/es/state_tracker/st_cb_drawtex.c b/src/mesa/es/state_tracker/st_cb_drawtex.c
    index 0a5cba9d927..f75f4861a2f 100644
    --- a/src/mesa/es/state_tracker/st_cb_drawtex.c
    +++ b/src/mesa/es/state_tracker/st_cb_drawtex.c
    @@ -120,8 +120,11 @@ st_DrawTex(GLcontext *ctx, GLfloat x, GLfloat y, GLfloat z,
        GLboolean emitColor;
        uint semantic_names[2 + MAX_TEXTURE_UNITS];
        uint semantic_indexes[2 + MAX_TEXTURE_UNITS];
    +   struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS];
        GLbitfield inputs = VERT_BIT_POS;
     
    +   st_validate_state(st);
    +
        /* determine if we need vertex color */
        if (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_COL0)
           emitColor = GL_TRUE;
    @@ -232,6 +235,7 @@ st_DrawTex(GLcontext *ctx, GLfloat x, GLfloat y, GLfloat z,
     
        cso_save_viewport(cso);
        cso_save_vertex_shader(cso);
    +   cso_save_vertex_elements(cso);
     
        {
           void *vs = lookup_shader(pipe, numAttribs,
    @@ -239,6 +243,14 @@ st_DrawTex(GLcontext *ctx, GLfloat x, GLfloat y, GLfloat z,
           cso_set_vertex_shader_handle(cso, vs);
        }
     
    +   for (i = 0; i < numAttribs; i++) {
    +      velements[i].src_offset = i * 4 * sizeof(float);
    +      velements[i].instance_divisor = 0;
    +      velements[i].vertex_buffer_index = 0;
    +      velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
    +   }
    +   cso_set_vertex_elements(cso, numAttribs, velements);
    +
        /* viewport state: viewport matching window dims */
        {
           const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
    @@ -270,6 +282,7 @@ st_DrawTex(GLcontext *ctx, GLfloat x, GLfloat y, GLfloat z,
        /* restore state */
        cso_restore_viewport(cso);
        cso_restore_vertex_shader(cso);
    +   cso_restore_vertex_elements(cso);
     }
     
     
    diff --git a/src/mesa/glapi/glapi_getproc.c b/src/mesa/glapi/glapi_getproc.c
    index 46b466920b6..295657875dc 100644
    --- a/src/mesa/glapi/glapi_getproc.c
    +++ b/src/mesa/glapi/glapi_getproc.c
    @@ -44,11 +44,14 @@
     #include "glapi/glapioffsets.h"
     
     
    +/**********************************************************************
    + * Static function management.
    + */
    +
    +
     #if !defined(DISPATCH_FUNCTION_SIZE) && !defined(XFree86Server)
     # define NEED_FUNCTION_POINTER
     #endif
    -
    -/* The code in this file is auto-generated with Python */
     #include "glapi/glprocs.h"
     
     
    @@ -57,7 +60,7 @@
      * and return the corresponding glprocs_table_t entry.
      */
     static const glprocs_table_t *
    -find_entry( const char * n )
    +get_static_proc( const char * n )
     {
        GLuint i;
        for (i = 0; static_functions[i].Name_offset >= 0; i++) {
    @@ -83,11 +86,12 @@ find_entry( const char * n )
     static GLint
     get_static_proc_offset(const char *funcName)
     {
    -   const glprocs_table_t * const f = find_entry( funcName );
    -   if (f) {
    -      return f->Offset;
    +   const glprocs_table_t * const f = get_static_proc( funcName );
    +   if (f == NULL) {
    +      return -1;
        }
    -   return -1;
    +
    +   return f->Offset;
     }
     
     
    @@ -100,27 +104,34 @@ get_static_proc_offset(const char *funcName)
     static _glapi_proc
     get_static_proc_address(const char *funcName)
     {
    -   const glprocs_table_t * const f = find_entry( funcName );
    -   if (f) {
    -#if defined(DISPATCH_FUNCTION_SIZE) && defined(GLX_INDIRECT_RENDERING)
    -      return (f->Address == NULL)
    -	 ? get_entrypoint_address(f->Offset)
    -         : f->Address;
    -#elif defined(DISPATCH_FUNCTION_SIZE)
    -      return get_entrypoint_address(f->Offset);
    -#else
    -      return f->Address;
    -#endif
    -   }
    -   else {
    +   const glprocs_table_t * const f = get_static_proc( funcName );
    +   if (f == NULL) {
           return NULL;
        }
    +
    +#if defined(DISPATCH_FUNCTION_SIZE) && defined(GLX_INDIRECT_RENDERING)
    +   return (f->Address == NULL)
    +      ? get_entrypoint_address(f->Offset)
    +      : f->Address;
    +#elif defined(DISPATCH_FUNCTION_SIZE)
    +   return get_entrypoint_address(f->Offset);
    +#else
    +   return f->Address;
    +#endif
    +}
    +
    +#else
    +
    +static _glapi_proc
    +get_static_proc_address(const char *funcName)
    +{
    +   (void) funcName;
    +   return NULL;
     }
     
     #endif /* !defined(XFree86Server) */
     
     
    -
     /**
      * Return the name of the function at the given offset in the dispatch
      * table.  For debugging only.
    @@ -199,6 +210,56 @@ static struct _glapi_function ExtEntryTable[MAX_EXTENSION_FUNCS];
     static GLuint NumExtEntryPoints = 0;
     
     
    +static struct _glapi_function *
    +get_extension_proc(const char *funcName)
    +{
    +   GLuint i;
    +   for (i = 0; i < NumExtEntryPoints; i++) {
    +      if (strcmp(ExtEntryTable[i].name, funcName) == 0) {
    +         return & ExtEntryTable[i];
    +      }
    +   }
    +   return NULL;
    +}
    +
    +
    +static GLint
    +get_extension_proc_offset(const char *funcName)
    +{
    +   const struct _glapi_function * const f = get_extension_proc( funcName );
    +   if (f == NULL) {
    +      return -1;
    +   }
    +
    +   return f->dispatch_offset;
    +}
    +
    +
    +static _glapi_proc
    +get_extension_proc_address(const char *funcName)
    +{
    +   const struct _glapi_function * const f = get_extension_proc( funcName );
    +   if (f == NULL) {
    +      return NULL;
    +   }
    +
    +   return f->dispatch_stub;
    +}
    +
    +
    +static const char *
    +get_extension_proc_name(GLuint offset)
    +{
    +   GLuint i;
    +   for (i = 0; i < NumExtEntryPoints; i++) {
    +      if (ExtEntryTable[i].dispatch_offset == offset) {
    +         return ExtEntryTable[i].name;
    +      }
    +   }
    +   return NULL;
    +}
    +
    +
     /**
      * strdup() is actually not a standard ANSI C or POSIX routine.
      * Irix will not define it if ANSI mode is in effect.
    @@ -232,20 +293,55 @@ static struct _glapi_function *
     add_function_name( const char * funcName )
     {
        struct _glapi_function * entry = NULL;
    -   
    -   if (NumExtEntryPoints < MAX_EXTENSION_FUNCS) {
    -      _glapi_proc entrypoint = generate_entrypoint(~0);
    -      if (entrypoint != NULL) {
    -	 entry = & ExtEntryTable[NumExtEntryPoints];
    +   _glapi_proc entrypoint = NULL;
    +   char * name_dup = NULL;
     
    -	 ExtEntryTable[NumExtEntryPoints].name = str_dup(funcName);
    -	 ExtEntryTable[NumExtEntryPoints].parameter_signature = NULL;
    -	 ExtEntryTable[NumExtEntryPoints].dispatch_offset = ~0;
    -	 ExtEntryTable[NumExtEntryPoints].dispatch_stub = entrypoint;
    -	 NumExtEntryPoints++;
    -      }
    +   if (NumExtEntryPoints >= MAX_EXTENSION_FUNCS)
    +      return NULL;
    +
    +   if (funcName == NULL)
    +      return NULL;
    +
    +   name_dup = str_dup(funcName);
    +   if (name_dup == NULL)
    +      return NULL;
    +
    +   entrypoint = generate_entrypoint(~0);
    +
    +   if (entrypoint == NULL) {
    +      free(name_dup);
    +      return NULL;
        }
     
    +   entry = & ExtEntryTable[NumExtEntryPoints];
    +   NumExtEntryPoints++;
    +
    +   entry->name = name_dup;
    +   entry->parameter_signature = NULL;
    +   entry->dispatch_offset = ~0;
    +   entry->dispatch_stub = entrypoint;
    +
    +   return entry;
    +}
    +
    +
    +static struct _glapi_function *
    +set_entry_info( struct _glapi_function * entry, const char * signature, unsigned offset )
    +{
    +   char * sig_dup = NULL;
    +
    +   if (signature == NULL)
    +      return NULL;
    +
    +   sig_dup = str_dup(signature);
    +   if (sig_dup == NULL)
    +      return NULL;
    +
    +   fill_in_entrypoint_offset(entry->dispatch_stub, offset);
    +
    +   entry->parameter_signature = sig_dup;
    +   entry->dispatch_offset = offset;
    +
        return entry;
     }
     
    @@ -307,88 +403,103 @@ _glapi_add_dispatch( const char * const * function_names,
        struct _glapi_function * entry[8];
        GLboolean is_static[8];
        unsigned i;
    -   unsigned j;
        int offset = ~0;
    -   int new_offset;
     
    +   init_glapi_relocs_once();
     
        (void) memset( is_static, 0, sizeof( is_static ) );
        (void) memset( entry, 0, sizeof( entry ) );
     
    +   /* Find the _single_ dispatch offset for all function names that already
    +    * exist (and have a dispatch offset).
    +    */
    +
        for ( i = 0 ; function_names[i] != NULL ; i++ ) {
    -      /* Do some trivial validation on the name of the function.
    -       */
    +      const char * funcName = function_names[i];
    +      int static_offset;
    +      int extension_offset;
     
    -      if (!function_names[i] || function_names[i][0] != 'g' || function_names[i][1] != 'l')
    +      if (funcName[0] != 'g' || funcName[1] != 'l')
              return -1;
    -   
    -      /* Determine if the named function already exists.  If the function does
    -       * exist, it must have the same parameter signature as the function
    -       * being added.
    -       */
     
    -      new_offset = get_static_proc_offset(function_names[i]);
    -      if (new_offset >= 0) {
    +      /* search built-in functions */
    +      static_offset = get_static_proc_offset(funcName);
    +
    +      if (static_offset >= 0) {
    +
    +	 is_static[i] = GL_TRUE;
    +
     	 /* FIXME: Make sure the parameter signatures match!  How do we get
     	  * FIXME: the parameter signature for static functions?
     	  */
     
    -	 if ( (offset != ~0) && (new_offset != offset) ) {
    +	 if ( (offset != ~0) && (static_offset != offset) ) {
     	    return -1;
     	 }
     
    -	 is_static[i] = GL_TRUE;
    -	 offset = new_offset;
    +	 offset = static_offset;
    +
    +	 continue;
           }
    -   
    -   
    -      for ( j = 0 ; j < NumExtEntryPoints ; j++ ) {
    -	 if (strcmp(ExtEntryTable[j].name, function_names[i]) == 0) {
    -	    /* The offset may be ~0 if the function name was added by
    -	     * glXGetProcAddress but never filled in by the driver.
    -	     */
     
    -	    if (ExtEntryTable[j].dispatch_offset != ~0) {
    -	       if (strcmp(real_sig, ExtEntryTable[j].parameter_signature) 
    -		   != 0) {
    -		  return -1;
    -	       }
    +      /* search added extension functions */
    +      entry[i] = get_extension_proc(funcName);
     
    -	       if ( (offset != ~0) && (ExtEntryTable[j].dispatch_offset != offset) ) {
    -		  return -1;
    -	       }
    +      if (entry[i] != NULL) {
    +	 extension_offset = entry[i]->dispatch_offset;
     
    -	       offset = ExtEntryTable[j].dispatch_offset;
    -	    }
    -	    
    -	    entry[i] = & ExtEntryTable[j];
    -	    break;
    +	 /* The offset may be ~0 if the function name was added by
    +	  * glXGetProcAddress but never filled in by the driver.
    +	  */
    +
    +	 if (extension_offset == ~0) {
    +	    continue;
     	 }
    +
    +	 if (strcmp(real_sig, entry[i]->parameter_signature) != 0) {
    +	    return -1;
    +	 }
    +
    +	 if ( (offset != ~0) && (extension_offset != offset) ) {
    +	    return -1;
    +	 }
    +
    +	 offset = extension_offset;
           }
        }
     
    +   /* If all function names are either new (or with no dispatch offset),
    +    * allocate a new dispatch offset.
    +    */
    +
        if (offset == ~0) {
           offset = next_dynamic_offset;
           next_dynamic_offset++;
        }
     
    -   for ( i = 0 ; function_names[i] != NULL ; i++ ) {
    -      if (! is_static[i] ) {
    -	 if (entry[i] == NULL) {
    -	    entry[i] = add_function_name( function_names[i] );
    -	    if (entry[i] == NULL) {
    -	       /* FIXME: Possible memory leak here.
    -		*/
    -	       return -1;
    -	    }
    -	 }
    +   /* Fill in the dispatch offset for the new function names (and those with
    +    * no dispatch offset).
    +    */
     
    -	 entry[i]->parameter_signature = str_dup(real_sig);
    -	 fill_in_entrypoint_offset(entry[i]->dispatch_stub, offset);
    -	 entry[i]->dispatch_offset = offset;
    +   for ( i = 0 ; function_names[i] != NULL ; i++ ) {
    +      if (is_static[i]) {
    +	 continue;
    +      }
    +
    +      /* generate entrypoints for new function names */
    +      if (entry[i] == NULL) {
    +	 entry[i] = add_function_name( function_names[i] );
    +	 if (entry[i] == NULL) {
    +	    /* FIXME: Possible memory leak here. */
    +	    return -1;
    +	 }
    +      }
    +
    +      if (entry[i]->dispatch_offset == ~0) {
    +	 set_entry_info( entry[i], real_sig, offset );
           }
        }
    -   
    +
        return offset;
     }
     
    @@ -399,13 +510,13 @@ _glapi_add_dispatch( const char * const * function_names,
     PUBLIC GLint
     _glapi_get_proc_offset(const char *funcName)
     {
    +   GLint offset;
    +
        /* search extension functions first */
    -   GLuint i;
    -   for (i = 0; i < NumExtEntryPoints; i++) {
    -      if (strcmp(ExtEntryTable[i].name, funcName) == 0) {
    -         return ExtEntryTable[i].dispatch_offset;
    -      }
    -   }
    +   offset = get_extension_proc_offset(funcName);
    +   if (offset >= 0)
    +      return offset;
    +
        /* search static functions */
        return get_static_proc_offset(funcName);
     }
    @@ -420,8 +531,10 @@ _glapi_get_proc_offset(const char *funcName)
     PUBLIC _glapi_proc
     _glapi_get_proc_address(const char *funcName)
     {
    +   _glapi_proc func;
        struct _glapi_function * entry;
    -   GLuint i;
    +
    +   init_glapi_relocs_once();
     
     #ifdef MANGLE
        /* skip the prefix on the name */
    @@ -433,23 +546,21 @@ _glapi_get_proc_address(const char *funcName)
     #endif
     
        /* search extension functions first */
    -   for (i = 0; i < NumExtEntryPoints; i++) {
    -      if (strcmp(ExtEntryTable[i].name, funcName) == 0) {
    -         return ExtEntryTable[i].dispatch_stub;
    -      }
    -   }
    +   func = get_extension_proc_address(funcName);
    +   if (func)
    +      return func;
     
    -#if !defined( XFree86Server )
        /* search static functions */
    -   {
    -      const _glapi_proc func = get_static_proc_address(funcName);
    -      if (func)
    -         return func;
    -   }
    -#endif /* !defined( XFree86Server ) */
    +   func = get_static_proc_address(funcName);
    +   if (func)
    +      return func;
     
    +   /* generate entrypoint, dispatch offset must be filled in by the driver */
        entry = add_function_name(funcName);
    -   return (entry == NULL) ? NULL : entry->dispatch_stub;
    +   if (entry == NULL)
    +      return NULL;
    +
    +   return entry->dispatch_stub;
     }
     
     
    @@ -461,7 +572,6 @@ _glapi_get_proc_address(const char *funcName)
     const char *
     _glapi_get_proc_name(GLuint offset)
     {
    -   GLuint i;
        const char * n;
     
        /* search built-in functions */
    @@ -471,12 +581,7 @@ _glapi_get_proc_name(GLuint offset)
        }
     
        /* search added extension functions */
    -   for (i = 0; i < NumExtEntryPoints; i++) {
    -      if (ExtEntryTable[i].dispatch_offset == offset) {
    -         return ExtEntryTable[i].name;
    -      }
    -   }
    -   return NULL;
    +   return get_extension_proc_name(offset);
     }
     
     
    diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
    index 197de09b22a..7c02faaa535 100644
    --- a/src/mesa/main/dd.h
    +++ b/src/mesa/main/dd.h
    @@ -1072,7 +1072,7 @@ struct dd_function_table {
      * These are the initial values to be installed into dispatch by
      * mesa.  If the T&L driver wants to modify the dispatch table
      * while installed, it must do so itself.  It would be possible for
    - * the vertexformat to install it's own initial values for these
    + * the vertexformat to install its own initial values for these
      * functions, but this way there is an obvious list of what is
      * expected of the driver.
      *
    diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
    index 14c533e0d43..7c442e390c2 100644
    --- a/src/mesa/main/fbobject.c
    +++ b/src/mesa/main/fbobject.c
    @@ -1549,6 +1549,7 @@ framebuffer_texture(GLcontext *ctx, const char *caller, GLenum target,
           texObj = _mesa_lookup_texture(ctx, texture);
           if (texObj != NULL) {
              if (textarget == 0) {
    +            /* XXX what's the purpose of this? */
                 err = (texObj->Target != GL_TEXTURE_3D) &&
                     (texObj->Target != GL_TEXTURE_1D_ARRAY_EXT) &&
                     (texObj->Target != GL_TEXTURE_2D_ARRAY_EXT);
    @@ -1559,6 +1560,13 @@ framebuffer_texture(GLcontext *ctx, const char *caller, GLenum target,
                     : (texObj->Target != textarget);
              }
           }
    +      else {
    +         /* can't render to a non-existant texture */
    +         _mesa_error(ctx, GL_INVALID_OPERATION,
    +                     "glFramebufferTexture%sEXT(non existant texture)",
    +                     caller);
    +         return;
    +      }
     
           if (err) {
              _mesa_error(ctx, GL_INVALID_OPERATION,
    diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
    index 6a85162d5da..5a654e5c2a3 100644
    --- a/src/mesa/main/framebuffer.c
    +++ b/src/mesa/main/framebuffer.c
    @@ -986,6 +986,10 @@ _mesa_dest_buffer_exists(GLcontext *ctx, GLenum format)
        return GL_TRUE;
     }
     
    +
    +/**
    + * Used to answer the GL_IMPLEMENTATION_COLOR_READ_FORMAT_OES query.
    + */
     GLenum
     _mesa_get_color_read_format(GLcontext *ctx)
     {
    @@ -999,6 +1003,10 @@ _mesa_get_color_read_format(GLcontext *ctx)
        }
     }
     
    +
    +/**
    + * Used to answer the GL_IMPLEMENTATION_COLOR_READ_TYPE_OES query.
    + */
     GLenum
     _mesa_get_color_read_type(GLcontext *ctx)
     {
    diff --git a/src/mesa/main/texcompress_fxt1.c b/src/mesa/main/texcompress_fxt1.c
    index 149853f7acd..04acf05e528 100644
    --- a/src/mesa/main/texcompress_fxt1.c
    +++ b/src/mesa/main/texcompress_fxt1.c
    @@ -476,7 +476,7 @@ fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
         *     for each sample color
         *         sort to nearest vector.
         *
    -    *     replace each vector with the centroid of it's matching colors.
    +    *     replace each vector with the centroid of its matching colors.
         *
         *     repeat until RMS doesn't improve.
         *
    diff --git a/src/mesa/math/m_debug_util.h b/src/mesa/math/m_debug_util.h
    index 2e67db8e55d..ed11c849ece 100644
    --- a/src/mesa/math/m_debug_util.h
    +++ b/src/mesa/math/m_debug_util.h
    @@ -61,7 +61,7 @@ extern long counter_overhead;
      */
     extern char *mesa_profile;
     
    -/* Modify the the number of tests if you like.
    +/* Modify the number of tests if you like.
      * We take the minimum of all results, because every error should be
      * positive (time used by other processes, task switches etc).
      * It is assumed that all calculations are done in the cache.
    diff --git a/src/mesa/math/m_matrix.c b/src/mesa/math/m_matrix.c
    index ef8a40fbecb..4b33d0bbb37 100644
    --- a/src/mesa/math/m_matrix.c
    +++ b/src/mesa/math/m_matrix.c
    @@ -889,7 +889,7 @@ _math_matrix_rotate( GLmatrix *mat,
            *  Y-axis to bring the axis vector parallel with the X-axis.  The
            *  rotation about the X-axis is then performed.  Ry and Rz are
            *  simply the respective inverse transforms to bring the arbitrary
    -       *  axis back to it's original orientation.  The first transforms
    +       *  axis back to its original orientation.  The first transforms
            *  Rz' and Ry' are considered inverses, since the data from the
            *  arbitrary axis gives you info on how to get to it, not how
            *  to get away from it, and an inverse must be applied.
    diff --git a/src/mesa/shader/lex.yy.c b/src/mesa/shader/lex.yy.c
    index d1af35fedb6..a08617ff8d4 100644
    --- a/src/mesa/shader/lex.yy.c
    +++ b/src/mesa/shader/lex.yy.c
    @@ -53,6 +53,7 @@ typedef int flex_int32_t;
     typedef unsigned char flex_uint8_t; 
     typedef unsigned short int flex_uint16_t;
     typedef unsigned int flex_uint32_t;
    +#endif /* ! C99 */
     
     /* Limits of integral types. */
     #ifndef INT8_MIN
    @@ -83,8 +84,6 @@ typedef unsigned int flex_uint32_t;
     #define UINT32_MAX             (4294967295U)
     #endif
     
    -#endif /* ! C99 */
    -
     #endif /* ! FLEXINT_H */
     
     #ifdef __cplusplus
    @@ -158,15 +157,7 @@ typedef void* yyscan_t;
     
     /* Size of default input buffer. */
     #ifndef YY_BUF_SIZE
    -#ifdef __ia64__
    -/* On IA-64, the buffer size is 16k, not 8k.
    - * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
    - * Ditto for the __ia64__ case accordingly.
    - */
    -#define YY_BUF_SIZE 32768
    -#else
     #define YY_BUF_SIZE 16384
    -#endif /* __ia64__ */
     #endif
     
     /* The state buf must be large enough to hold one state per character in the main buffer.
    @@ -1161,7 +1152,7 @@ handle_ident(struct asm_parser_state *state, const char *text, YYSTYPE *lval)
        } while(0);
     
     #define YY_EXTRA_TYPE struct asm_parser_state *
    -#line 1165 "lex.yy.c"
    +#line 1156 "lex.yy.c"
     
     #define INITIAL 0
     
    @@ -1298,12 +1289,7 @@ static int input (yyscan_t yyscanner );
     
     /* Amount of stuff to slurp up with each read. */
     #ifndef YY_READ_BUF_SIZE
    -#ifdef __ia64__
    -/* On IA-64, the buffer size is 16k, not 8k */
    -#define YY_READ_BUF_SIZE 16384
    -#else
     #define YY_READ_BUF_SIZE 8192
    -#endif /* __ia64__ */
     #endif
     
     /* Copy whatever the last rule matched to the standard output. */
    @@ -1311,7 +1297,7 @@ static int input (yyscan_t yyscanner );
     /* This used to be an fputs(), but since the string might contain NUL's,
      * we now use fwrite().
      */
    -#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0)
    +#define ECHO fwrite( yytext, yyleng, 1, yyout )
     #endif
     
     /* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
    @@ -1322,7 +1308,7 @@ static int input (yyscan_t yyscanner );
     	if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
     		{ \
     		int c = '*'; \
    -		size_t n; \
    +		unsigned n; \
     		for ( n = 0; n < max_size && \
     			     (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
     			buf[n] = (char) c; \
    @@ -1410,7 +1396,7 @@ YY_DECL
     #line 157 "program_lexer.l"
     
     
    -#line 1414 "lex.yy.c"
    +#line 1400 "lex.yy.c"
     
         yylval = yylval_param;
     
    @@ -2212,7 +2198,7 @@ case 142:
     YY_RULE_SETUP
     #line 326 "program_lexer.l"
     {
    -   yylval->real = _mesa_strtod(yytext, NULL);
    +   yylval->real = (float) _mesa_strtod(yytext, NULL);
        return REAL;
     }
     	YY_BREAK
    @@ -2224,7 +2210,7 @@ YY_DO_BEFORE_ACTION; /* set up yytext again */
     YY_RULE_SETUP
     #line 330 "program_lexer.l"
     {
    -   yylval->real = _mesa_strtod(yytext, NULL);
    +   yylval->real = (float) _mesa_strtod(yytext, NULL);
        return REAL;
     }
     	YY_BREAK
    @@ -2232,7 +2218,7 @@ case 144:
     YY_RULE_SETUP
     #line 334 "program_lexer.l"
     {
    -   yylval->real = _mesa_strtod(yytext, NULL);
    +   yylval->real = (float) _mesa_strtod(yytext, NULL);
        return REAL;
     }
     	YY_BREAK
    @@ -2240,7 +2226,7 @@ case 145:
     YY_RULE_SETUP
     #line 338 "program_lexer.l"
     {
    -   yylval->real = _mesa_strtod(yytext, NULL);
    +   yylval->real = (float) _mesa_strtod(yytext, NULL);
        return REAL;
     }
     	YY_BREAK
    @@ -2474,7 +2460,7 @@ YY_RULE_SETUP
     #line 481 "program_lexer.l"
     ECHO;
     	YY_BREAK
    -#line 2478 "lex.yy.c"
    +#line 2464 "lex.yy.c"
     case YY_STATE_EOF(INITIAL):
     	yyterminate();
     
    @@ -3242,8 +3228,8 @@ YY_BUFFER_STATE yy_scan_string (yyconst char * yystr , yyscan_t yyscanner)
     
     /** Setup the input buffer state to scan the given bytes. The next call to yylex() will
      * scan from a @e copy of @a bytes.
    - * @param yybytes the byte buffer to scan
    - * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
    + * @param bytes the byte buffer to scan
    + * @param len the number of bytes in the buffer pointed to by @a bytes.
      * @param yyscanner The scanner object.
      * @return the newly allocated buffer state object.
      */
    diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h
    index 224350caac6..28c797a4ba8 100644
    --- a/src/mesa/shader/prog_instruction.h
    +++ b/src/mesa/shader/prog_instruction.h
    @@ -97,8 +97,8 @@
     #define COND_EQ  2  /**< equal to zero */
     #define COND_LT  3  /**< less than zero */
     #define COND_UN  4  /**< unordered (NaN) */
    -#define COND_GE  5  /**< greater then or equal to zero */
    -#define COND_LE  6  /**< less then or equal to zero */
    +#define COND_GE  5  /**< greater than or equal to zero */
    +#define COND_LE  6  /**< less than or equal to zero */
     #define COND_NE  7  /**< not equal to zero */
     #define COND_TR  8  /**< always true */
     #define COND_FL  9  /**< always false */
    diff --git a/src/mesa/shader/program_lexer.l b/src/mesa/shader/program_lexer.l
    index 83bc5089d9e..b00765793dc 100644
    --- a/src/mesa/shader/program_lexer.l
    +++ b/src/mesa/shader/program_lexer.l
    @@ -324,19 +324,19 @@ ARRAYSHADOW2D             { return_token_or_IDENTIFIER(require_ARB_fp && require
        return INTEGER;
     }
     {num}?{frac}{exp}?        {
    -   yylval->real = _mesa_strtod(yytext, NULL);
    +   yylval->real = (float) _mesa_strtod(yytext, NULL);
        return REAL;
     }
     {num}"."/[^.]             {
    -   yylval->real = _mesa_strtod(yytext, NULL);
    +   yylval->real = (float) _mesa_strtod(yytext, NULL);
        return REAL;
     }
     {num}{exp}                {
    -   yylval->real = _mesa_strtod(yytext, NULL);
    +   yylval->real = (float) _mesa_strtod(yytext, NULL);
        return REAL;
     }
     {num}"."{exp}             {
    -   yylval->real = _mesa_strtod(yytext, NULL);
    +   yylval->real = (float) _mesa_strtod(yytext, NULL);
        return REAL;
     }
     
    diff --git a/src/mesa/shader/program_parser.h b/src/mesa/shader/program_parser.h
    index 730466c30f5..be952d4b9c8 100644
    --- a/src/mesa/shader/program_parser.h
    +++ b/src/mesa/shader/program_parser.h
    @@ -62,7 +62,7 @@ struct asm_symbol {
         */
        unsigned param_binding_swizzle;
     
    -   /* This is how many entries in the the program_parameter_list we take up
    +   /* This is how many entries in the program_parameter_list we take up
         * with our state tokens or constants. Note that this is _not_ the same as
         * the number of param registers we eventually use.
         */
    diff --git a/src/mesa/shader/slang/library/slang_common_builtin.gc b/src/mesa/shader/slang/library/slang_common_builtin.gc
    index 8b7771c2846..a25ca55bc42 100644
    --- a/src/mesa/shader/slang/library/slang_common_builtin.gc
    +++ b/src/mesa/shader/slang/library/slang_common_builtin.gc
    @@ -695,7 +695,7 @@ vec3 normalize(const vec3 v)
     {
     //   const float s = inversesqrt(dot(v, v));
     //   __retVal = v * s;
    -// XXX note, we _could_ use __retVal.w instead of tmp and and save a
    +// XXX note, we _could_ use __retVal.w instead of tmp and save a
     // register, but that's actually a compilation error because v is a vec3
     // and the .w suffix is illegal.  Oh well.
        float tmp;
    diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c
    index e766b3a9038..03e33361448 100644
    --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c
    +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c
    @@ -138,7 +138,6 @@ static void
     load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt)
     {
        struct pipe_context *pipe = ctx->st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        struct pipe_transfer *transfer;
        const GLuint rSize = ctx->PixelMaps.RtoR.Size;
        const GLuint gSize = ctx->PixelMaps.GtoG.Size;
    @@ -151,7 +150,7 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt)
        transfer = st_cond_flush_get_tex_transfer(st_context(ctx),
     					     pt, 0, 0, 0, PIPE_TRANSFER_WRITE,
     					     0, 0, texSize, texSize);
    -   dest = (uint *) screen->transfer_map(screen, transfer);
    +   dest = (uint *) pipe->transfer_map(pipe, transfer);
     
        /* Pack four 1D maps into a 2D texture:
         * R map is placed horizontally, indexed by S, in channel 0
    @@ -172,8 +171,8 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt)
           }
        }
     
    -   screen->transfer_unmap(screen, transfer);
    -   screen->tex_transfer_destroy(transfer);
    +   pipe->transfer_unmap(pipe, transfer);
    +   pipe->tex_transfer_destroy(pipe, transfer);
     }
     
     
    diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c
    index 33e43ddcc4c..01aba3e3dd4 100644
    --- a/src/mesa/state_tracker/st_cb_accum.c
    +++ b/src/mesa/state_tracker/st_cb_accum.c
    @@ -129,7 +129,6 @@ accum_accum(struct st_context *st, GLfloat value,
                 struct st_renderbuffer *color_strb)
     {
        struct pipe_context *pipe = st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        struct pipe_transfer *color_trans;
        size_t stride = acc_strb->stride;
        GLubyte *data = acc_strb->data;
    @@ -145,7 +144,7 @@ accum_accum(struct st_context *st, GLfloat value,
     
        buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
     
    -   pipe_get_tile_rgba(color_trans, 0, 0, width, height, buf);
    +   pipe_get_tile_rgba(pipe, color_trans, 0, 0, width, height, buf);
     
        switch (acc_strb->format) {
        case PIPE_FORMAT_R16G16B16A16_SNORM:
    @@ -166,7 +165,7 @@ accum_accum(struct st_context *st, GLfloat value,
        }
     
        free(buf);
    -   screen->tex_transfer_destroy(color_trans);
    +   pipe->tex_transfer_destroy(pipe, color_trans);
     }
     
     
    @@ -177,7 +176,6 @@ accum_load(struct st_context *st, GLfloat value,
                struct st_renderbuffer *color_strb)
     {
        struct pipe_context *pipe = st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        struct pipe_transfer *color_trans;
        size_t stride = acc_strb->stride;
        GLubyte *data = acc_strb->data;
    @@ -194,7 +192,7 @@ accum_load(struct st_context *st, GLfloat value,
     
        buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
     
    -   pipe_get_tile_rgba(color_trans, 0, 0, width, height, buf);
    +   pipe_get_tile_rgba(pipe, color_trans, 0, 0, width, height, buf);
     
        switch (acc_strb->format) {
        case PIPE_FORMAT_R16G16B16A16_SNORM:
    @@ -215,7 +213,7 @@ accum_load(struct st_context *st, GLfloat value,
        }
     
        free(buf);
    -   screen->tex_transfer_destroy(color_trans);
    +   pipe->tex_transfer_destroy(pipe, color_trans);
     }
     
     
    @@ -226,7 +224,6 @@ accum_return(GLcontext *ctx, GLfloat value,
                  struct st_renderbuffer *color_strb)
     {
        struct pipe_context *pipe = ctx->st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        const GLubyte *colormask = ctx->Color.ColorMask[0];
        enum pipe_transfer_usage usage;
        struct pipe_transfer *color_trans;
    @@ -251,7 +248,7 @@ accum_return(GLcontext *ctx, GLfloat value,
     						width, height);
     
        if (usage & PIPE_TRANSFER_READ)
    -      pipe_get_tile_rgba(color_trans, 0, 0, width, height, buf);
    +      pipe_get_tile_rgba(pipe, color_trans, 0, 0, width, height, buf);
     
        switch (acc_strb->format) {
        case PIPE_FORMAT_R16G16B16A16_SNORM:
    @@ -280,10 +277,10 @@ accum_return(GLcontext *ctx, GLfloat value,
           _mesa_problem(NULL, "unexpected format in st_clear_accum_buffer()");
        }
     
    -   pipe_put_tile_rgba(color_trans, 0, 0, width, height, buf);
    +   pipe_put_tile_rgba(pipe, color_trans, 0, 0, width, height, buf);
     
        free(buf);
    -   screen->tex_transfer_destroy(color_trans);
    +   pipe->tex_transfer_destroy(pipe, color_trans);
     }
     
     
    diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
    index 25d33b933e3..9a0446bb710 100644
    --- a/src/mesa/state_tracker/st_cb_bitmap.c
    +++ b/src/mesa/state_tracker/st_cb_bitmap.c
    @@ -259,7 +259,6 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height,
                         const GLubyte *bitmap)
     {
        struct pipe_context *pipe = ctx->st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        struct pipe_transfer *transfer;
        ubyte *dest;
        struct pipe_texture *pt;
    @@ -285,7 +284,7 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height,
     					   PIPE_TRANSFER_WRITE,
     					   0, 0, width, height);
     
    -   dest = screen->transfer_map(screen, transfer);
    +   dest = pipe->transfer_map(pipe, transfer);
     
        /* Put image into texture transfer */
        memset(dest, 0xff, height * transfer->stride);
    @@ -295,8 +294,8 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height,
        _mesa_unmap_pbo_source(ctx, unpack);
     
        /* Release transfer */
    -   screen->transfer_unmap(screen, transfer);
    -   screen->tex_transfer_destroy(transfer);
    +   pipe->transfer_unmap(pipe, transfer);
    +   pipe->tex_transfer_destroy(pipe, transfer);
     
        return pt;
     }
    @@ -520,7 +519,6 @@ static void
     reset_cache(struct st_context *st)
     {
        struct pipe_context *pipe = st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        struct bitmap_cache *cache = st->bitmap.cache;
     
        /*memset(cache->buffer, 0xff, sizeof(cache->buffer));*/
    @@ -532,7 +530,7 @@ reset_cache(struct st_context *st)
        cache->ymax = -1000000;
     
        if (cache->trans) {
    -      screen->tex_transfer_destroy(cache->trans);
    +      pipe->tex_transfer_destroy(pipe, cache->trans);
           cache->trans = NULL;
        }
     
    @@ -570,7 +568,6 @@ static void
     create_cache_trans(struct st_context *st)
     {
        struct pipe_context *pipe = st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        struct bitmap_cache *cache = st->bitmap.cache;
     
        if (cache->trans)
    @@ -583,7 +580,7 @@ create_cache_trans(struct st_context *st)
     					       PIPE_TRANSFER_WRITE, 0, 0,
     					       BITMAP_CACHE_WIDTH,
     					       BITMAP_CACHE_HEIGHT);
    -   cache->buffer = screen->transfer_map(screen, cache->trans);
    +   cache->buffer = pipe->transfer_map(pipe, cache->trans);
     
        /* init image to all 0xff */
        memset(cache->buffer, 0xff, cache->trans->stride * BITMAP_CACHE_HEIGHT);
    @@ -601,7 +598,6 @@ st_flush_bitmap_cache(struct st_context *st)
     
           if (st->ctx->DrawBuffer) {
              struct pipe_context *pipe = st->pipe;
    -         struct pipe_screen *screen = pipe->screen;
              struct pipe_sampler_view *sv;
     
              assert(cache->xmin <= cache->xmax);
    @@ -618,10 +614,10 @@ st_flush_bitmap_cache(struct st_context *st)
              if (cache->trans) {
                 if (0)
                    print_cache(cache);
    -            screen->transfer_unmap(screen, cache->trans);
    +            pipe->transfer_unmap(pipe, cache->trans);
                 cache->buffer = NULL;
     
    -            screen->tex_transfer_destroy(cache->trans);
    +            pipe->tex_transfer_destroy(pipe, cache->trans);
                 cache->trans = NULL;
              }
     
    @@ -837,7 +833,6 @@ void
     st_destroy_bitmap(struct st_context *st)
     {
        struct pipe_context *pipe = st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        struct bitmap_cache *cache = st->bitmap.cache;
     
     
    @@ -854,8 +849,8 @@ st_destroy_bitmap(struct st_context *st)
     
        if (cache) {
           if (cache->trans) {
    -         screen->transfer_unmap(screen, cache->trans);
    -         screen->tex_transfer_destroy(cache->trans);
    +         pipe->transfer_unmap(pipe, cache->trans);
    +         pipe->tex_transfer_destroy(pipe, cache->trans);
           }
           pipe_texture_reference(&st->bitmap.cache->texture, NULL);
           free(st->bitmap.cache);
    diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
    index 236010c5b5d..75be79fd4b0 100644
    --- a/src/mesa/state_tracker/st_cb_drawpixels.c
    +++ b/src/mesa/state_tracker/st_cb_drawpixels.c
    @@ -292,6 +292,51 @@ base_format(GLenum format)
     }
     
     
    +/**
    + * Create a temporary texture to hold an image of the given size.
    + * If width, height are not POT and the driver only handles POT textures,
    + * allocate the next larger size of texture that is POT.
    + */
    +static struct pipe_texture *
    +alloc_texture(struct st_context *st, GLsizei width, GLsizei height,
    +              enum pipe_format texFormat)
    +{
    +   struct pipe_context *pipe = st->pipe;
    +   struct pipe_screen *screen = pipe->screen;
    +   struct pipe_texture *pt;
    +   int ptw, pth;
    +
    +   ptw = width;
    +   pth = height;
    +
    +   /* Need to use POT texture? */
    +   if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES)) {
    +      int l2pt, maxSize;
    +
    +      l2pt = util_logbase2(width);
    +      if (1 << l2pt != width) {
    +         ptw = 1 << (l2pt + 1);
    +      }
    +
    +      l2pt = util_logbase2(height);
    +      if (1 << l2pt != height) {
    +         pth = 1 << (l2pt + 1);
    +      }
    +
    +      /* Check against maximum texture size */
    +      maxSize = 1 << (pipe->screen->get_param(pipe->screen,
    +                               PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
    +      assert(ptw <= maxSize);
    +      assert(pth <= maxSize);
    +   }
    +
    +   pt = st_texture_create(st, PIPE_TEXTURE_2D, texFormat, 0,
    +                          ptw, pth, 1, PIPE_TEXTURE_USAGE_SAMPLER);
    +
    +   return pt;
    +}
    +
    +
     /**
      * Make texture containing an image for glDrawPixels image.
      * If 'pixels' is NULL, leave the texture image data undefined.
    @@ -304,13 +349,11 @@ make_texture(struct st_context *st,
     {
        GLcontext *ctx = st->ctx;
        struct pipe_context *pipe = st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        gl_format mformat;
        struct pipe_texture *pt;
        enum pipe_format pipeFormat;
        GLuint cpp;
        GLenum baseFormat;
    -   int ptw, pth;
     
        baseFormat = base_format(format);
     
    @@ -325,29 +368,8 @@ make_texture(struct st_context *st,
        if (!pixels)
           return NULL;
     
    -   /* Need to use POT texture? */
    -   ptw = width;
    -   pth = height;
    -   if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES)) {
    -      int l2pt, maxSize;
    -
    -      l2pt = util_logbase2(width);
    -      if (1<screen->get_param(pipe->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
    -      assert(ptw <= maxSize);
    -      assert(pth <= maxSize);
    -   }
    -
    -   pt = st_texture_create(st, PIPE_TEXTURE_2D, pipeFormat, 0, ptw, pth, 1,
    -                          PIPE_TEXTURE_USAGE_SAMPLER);
    +   /* alloc temporary texture */
    +   pt = alloc_texture(st, width, height, pipeFormat);
        if (!pt) {
           _mesa_unmap_pbo_source(ctx, unpack);
           return NULL;
    @@ -368,7 +390,7 @@ make_texture(struct st_context *st,
     					      width, height);
     
           /* map texture transfer */
    -      dest = screen->transfer_map(screen, transfer);
    +      dest = pipe->transfer_map(pipe, transfer);
     
     
           /* Put image into texture transfer.
    @@ -388,8 +410,8 @@ make_texture(struct st_context *st,
                                    unpack);
     
           /* unmap */
    -      screen->transfer_unmap(screen, transfer);
    -      screen->tex_transfer_destroy(transfer);
    +      pipe->transfer_unmap(pipe, transfer);
    +      pipe->tex_transfer_destroy(pipe, transfer);
     
           assert(success);
     
    @@ -405,7 +427,7 @@ make_texture(struct st_context *st,
     
     /**
      * Draw quad with texcoords and optional color.
    - * Coords are window coords with y=0=bottom.
    + * Coords are gallium window coords with y=0=top.
      * \param color  may be null
      * \param invertTex  if true, flip texcoords vertically
      */
    @@ -595,10 +617,15 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
           cso_set_fragment_sampler_views(cso, 1, &sv);
        }
     
    -   /* Compute window coords (y=0=bottom) with pixel zoom.
    +   /* Compute Gallium window coords (y=0=top) with pixel zoom.
         * Recall that these coords are transformed by the current
         * vertex shader and viewport transformation.
         */
    +   if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) {
    +      y = ctx->DrawBuffer->Height - (int) (y + height * ctx->Pixel.ZoomY);
    +      invertTex = !invertTex;
    +   }
    +
        x0 = (GLfloat) x;
        x1 = x + width * ctx->Pixel.ZoomX;
        y0 = (GLfloat) y;
    @@ -630,7 +657,6 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
     {
        struct st_context *st = st_context(ctx);
        struct pipe_context *pipe = st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        struct st_renderbuffer *strb;
        enum pipe_transfer_usage usage;
        struct pipe_transfer *pt;
    @@ -664,7 +690,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
     				       usage, x, y,
     				       width, height);
     
    -   stmap = screen->transfer_map(screen, pt);
    +   stmap = pipe->transfer_map(pipe, pt);
     
        pixels = _mesa_map_pbo_source(ctx, &clippedUnpack, pixels);
        assert(pixels);
    @@ -764,8 +790,8 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
        _mesa_unmap_pbo_source(ctx, &clippedUnpack);
     
        /* unmap the stencil buffer */
    -   screen->transfer_unmap(screen, pt);
    -   screen->tex_transfer_destroy(pt);
    +   pipe->transfer_unmap(pipe, pt);
    +   pipe->tex_transfer_destroy(pipe, pt);
     }
     
     
    @@ -833,7 +859,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
                         GLint dstx, GLint dsty)
     {
        struct st_renderbuffer *rbDraw = st_renderbuffer(ctx->DrawBuffer->_StencilBuffer);
    -   struct pipe_screen *screen = ctx->st->pipe->screen;
    +   struct pipe_context *pipe = ctx->st->pipe;
        enum pipe_transfer_usage usage;
        struct pipe_transfer *ptDraw;
        ubyte *drawMap;
    @@ -869,7 +895,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
        assert(util_format_get_blockheight(ptDraw->texture->format) == 1);
     
        /* map the stencil buffer */
    -   drawMap = screen->transfer_map(screen, ptDraw);
    +   drawMap = pipe->transfer_map(pipe, ptDraw);
     
        /* draw */
        /* XXX PixelZoom not handled yet */
    @@ -922,8 +948,8 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
        free(buffer);
     
        /* unmap the stencil buffer */
    -   screen->transfer_unmap(screen, ptDraw);
    -   screen->tex_transfer_destroy(ptDraw);
    +   pipe->transfer_unmap(pipe, ptDraw);
    +   pipe->tex_transfer_destroy(pipe, ptDraw);
     }
     
     
    @@ -941,7 +967,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
        struct pipe_sampler_view *sv;
        GLfloat *color;
        enum pipe_format srcFormat, texFormat;
    -   int ptw, pth;
    +   GLboolean invertTex = GL_FALSE;
     
        pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
     
    @@ -1017,8 +1043,8 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
           }
        }
     
    -   if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
    -      srcy = ctx->DrawBuffer->Height - srcy - height;
    +   if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
    +      srcy = ctx->ReadBuffer->Height - srcy - height;
     
           if (srcy < 0) {
              height -= -srcy;
    @@ -1027,32 +1053,12 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
     
           if (height < 0)
              return;
    +
    +      invertTex = !invertTex;
        }
     
    -   /* Need to use POT texture? */
    -   ptw = width;
    -   pth = height;
    -   if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES)) {
    -      int l2pt, maxSize;
    -
    -      l2pt = util_logbase2(width);
    -      if (1<screen->get_param(pipe->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
    -      assert(ptw <= maxSize);
    -      assert(pth <= maxSize);
    -   }
    -
    -   pt = st_texture_create(st, PIPE_TEXTURE_2D, texFormat, 0,
    -                          ptw, pth, 1,
    -                          PIPE_TEXTURE_USAGE_SAMPLER);
    +   /* alloc temporary texture */
    +   pt = alloc_texture(st, width, height, texFormat);
        if (!pt)
           return;
     
    @@ -1062,6 +1068,9 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
           return;
        }
     
    +   /* Make temporary texture which is a copy of the src region.
    +    * We'll draw a quad with this texture to draw the dest image.
    +    */
        if (srcFormat == texFormat) {
           /* copy source framebuffer surface into mipmap/texture */
           struct pipe_surface *psRead = screen->get_tex_surface(screen,
    @@ -1082,6 +1091,13 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
                                psRead,
                                srcx, srcy, width, height);
           }
    +
    +      if (0) {
    +         /* debug */
    +         debug_dump_surface(pipe, "copypixsrcsurf", psRead);
    +         debug_dump_surface(pipe, "copypixtemptex", psTex);
    +      }
    +
           pipe_surface_reference(&psRead, NULL); 
           pipe_surface_reference(&psTex, NULL);
        }
    @@ -1109,21 +1125,21 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
              /* alternate path using get/put_tile() */
              GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
     
    -         pipe_get_tile_rgba(ptRead, 0, 0, width, height, buf);
    -         pipe_put_tile_rgba(ptTex, 0, 0, width, height, buf);
    +         pipe_get_tile_rgba(pipe, ptRead, 0, 0, width, height, buf);
    +         pipe_put_tile_rgba(pipe, ptTex, 0, 0, width, height, buf);
     
              free(buf);
           }
           else {
              /* GL_DEPTH */
              GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint));
    -         pipe_get_tile_z(ptRead, 0, 0, width, height, buf);
    -         pipe_put_tile_z(ptTex, 0, 0, width, height, buf);
    +         pipe_get_tile_z(pipe, ptRead, 0, 0, width, height, buf);
    +         pipe_put_tile_z(pipe, ptTex, 0, 0, width, height, buf);
              free(buf);
           }
     
    -      screen->tex_transfer_destroy(ptRead);
    -      screen->tex_transfer_destroy(ptTex);
    +      pipe->tex_transfer_destroy(pipe, ptRead);
    +      pipe->tex_transfer_destroy(pipe, ptTex);
        }
     
        /* draw textured quad */
    @@ -1132,7 +1148,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
                           sv, 
                           driver_vp, 
                           driver_fp,
    -                      color, GL_TRUE);
    +                      color, invertTex);
     
        pipe_texture_reference(&pt, NULL);
        pipe_sampler_view_reference(&sv, NULL);
    diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
    index 00e9d1dccbd..abf0c8d6cb1 100644
    --- a/src/mesa/state_tracker/st_cb_fbo.c
    +++ b/src/mesa/state_tracker/st_cb_fbo.c
    @@ -379,6 +379,8 @@ st_render_texture(GLcontext *ctx,
                                                PIPE_BUFFER_USAGE_GPU_READ |
                                                PIPE_BUFFER_USAGE_GPU_WRITE);
     
    +   strb->format = pt->format;
    +
        strb->Base.Format = st_pipe_format_to_mesa_format(pt->format);
        strb->Base.DataType = st_format_datatype(pt->format);
     
    diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c
    index 1329f807bc9..28a384ba49b 100644
    --- a/src/mesa/state_tracker/st_cb_flush.c
    +++ b/src/mesa/state_tracker/st_cb_flush.c
    @@ -79,7 +79,7 @@ display_front_buffer(struct st_context *st)
           /* Hook for copying "fake" frontbuffer if necessary:
            */
           st->pipe->screen->flush_frontbuffer( st->pipe->screen, front_surf,
    -                                           st->pipe->priv );
    +                                           st->winsys_drawable_handle );
     
           /*
             st->frontbuffer_status = FRONT_STATUS_UNDEFINED;
    diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c
    index 952d9ce9156..080a5f9bfb8 100644
    --- a/src/mesa/state_tracker/st_cb_readpixels.c
    +++ b/src/mesa/state_tracker/st_cb_readpixels.c
    @@ -63,7 +63,7 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
                            GLvoid *pixels)
     {
        struct gl_framebuffer *fb = ctx->ReadBuffer;
    -   struct pipe_screen *screen = ctx->st->pipe->screen;
    +   struct pipe_context *pipe = ctx->st->pipe;
        struct st_renderbuffer *strb = st_renderbuffer(fb->_StencilBuffer);
        struct pipe_transfer *pt;
        ubyte *stmap;
    @@ -81,7 +81,7 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
     				       width, height);
     
        /* map the stencil buffer */
    -   stmap = screen->transfer_map(screen, pt);
    +   stmap = pipe->transfer_map(pipe, pt);
     
        /* width should never be > MAX_WIDTH since we did clipping earlier */
        ASSERT(width <= MAX_WIDTH);
    @@ -161,8 +161,8 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
        }
     
        /* unmap the stencil buffer */
    -   screen->transfer_unmap(screen, pt);
    -   screen->tex_transfer_destroy(pt);
    +   pipe->transfer_unmap(pipe, pt);
    +   pipe->tex_transfer_destroy(pipe, pt);
     }
     
     
    @@ -234,13 +234,13 @@ st_fast_readpixels(GLcontext *ctx, struct st_renderbuffer *strb,
     
        {
           struct pipe_context *pipe = ctx->st->pipe;
    -      struct pipe_screen *screen = pipe->screen;
           struct pipe_transfer *trans;
           const GLubyte *map;
           GLubyte *dst;
           GLint row, col, dy, dstStride;
     
           if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
    +         /* convert GL Y to Gallium Y */
              y = strb->texture->height0 - y - height;
           }
     
    @@ -252,17 +252,22 @@ st_fast_readpixels(GLcontext *ctx, struct st_renderbuffer *strb,
              return GL_FALSE;
           }
     
    -      map = screen->transfer_map(screen, trans);
    +      map = pipe->transfer_map(pipe, trans);
           if (!map) {
    -         screen->tex_transfer_destroy(trans);
    +         pipe->tex_transfer_destroy(pipe, trans);
              return GL_FALSE;
           }
     
    +      /* We always write to the user/dest buffer from low addr to high addr
    +       * but the read order depends on renderbuffer orientation
    +       */
           if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
    +         /* read source rows from bottom to top */
              y = height - 1;
              dy = -1;
           }
           else {
    +         /* read source rows from top to bottom */
              y = 0;
              dy = 1;
           }
    @@ -311,8 +316,8 @@ st_fast_readpixels(GLcontext *ctx, struct st_renderbuffer *strb,
              ; /* nothing */
           }
     
    -      screen->transfer_unmap(screen, trans);
    -      screen->tex_transfer_destroy(trans);
    +      pipe->transfer_unmap(pipe, trans);
    +      pipe->tex_transfer_destroy(pipe, trans);
        }
     
        return GL_TRUE;
    @@ -331,7 +336,6 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
                   GLvoid *dest)
     {
        struct pipe_context *pipe = ctx->st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        GLfloat temp[MAX_WIDTH][4];
        const GLbitfield transferOps = ctx->_ImageTransferState;
        GLsizei i, j;
    @@ -396,6 +400,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
        }
     
        if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
    +      /* convert GL Y to Gallium Y */
           y = strb->Base.Height - y - height;
        }
     
    @@ -436,7 +441,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
                    GLuint ztemp[MAX_WIDTH];
                    GLfloat zfloat[MAX_WIDTH];
                    const double scale = 1.0 / ((1 << 24) - 1);
    -               pipe_get_tile_raw(trans, 0, y, width, 1, ztemp, 0);
    +               pipe_get_tile_raw(pipe, trans, 0, y, width, 1, ztemp, 0);
                    y += yStep;
                    for (j = 0; j < width; j++) {
                       zfloat[j] = (float) (scale * (ztemp[j] & 0xffffff));
    @@ -451,7 +456,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
                 assert(format == GL_DEPTH_STENCIL_EXT);
                 for (i = 0; i < height; i++) {
                    GLuint *zshort = (GLuint *)dst;
    -               pipe_get_tile_raw(trans, 0, y, width, 1, dst, 0);
    +               pipe_get_tile_raw(pipe, trans, 0, y, width, 1, dst, 0);
                    y += yStep;
                    /* Reverse into 24/8 */
                    for (j = 0; j < width; j++) {
    @@ -468,7 +473,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
                    GLuint ztemp[MAX_WIDTH];
                    GLfloat zfloat[MAX_WIDTH];
                    const double scale = 1.0 / ((1 << 24) - 1);
    -               pipe_get_tile_raw(trans, 0, y, width, 1, ztemp, 0);
    +               pipe_get_tile_raw(pipe, trans, 0, y, width, 1, ztemp, 0);
                    y += yStep;
                    for (j = 0; j < width; j++) {
                       zfloat[j] = (float) (scale * ((ztemp[j] >> 8) & 0xffffff));
    @@ -482,7 +487,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
                 /* XXX: unreachable code -- should be before st_read_stencil_pixels */
                 assert(format == GL_DEPTH_STENCIL_EXT);
                 for (i = 0; i < height; i++) {
    -               pipe_get_tile_raw(trans, 0, y, width, 1, dst, 0);
    +               pipe_get_tile_raw(pipe, trans, 0, y, width, 1, dst, 0);
                    y += yStep;
                    dst += dstStride;
                 }
    @@ -493,7 +498,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
                 GLushort ztemp[MAX_WIDTH];
                 GLfloat zfloat[MAX_WIDTH];
                 const double scale = 1.0 / 0xffff;
    -            pipe_get_tile_raw(trans, 0, y, width, 1, ztemp, 0);
    +            pipe_get_tile_raw(pipe, trans, 0, y, width, 1, ztemp, 0);
                 y += yStep;
                 for (j = 0; j < width; j++) {
                    zfloat[j] = (float) (scale * ztemp[j]);
    @@ -508,7 +513,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
                 GLuint ztemp[MAX_WIDTH];
                 GLfloat zfloat[MAX_WIDTH];
                 const double scale = 1.0 / 0xffffffff;
    -            pipe_get_tile_raw(trans, 0, y, width, 1, ztemp, 0);
    +            pipe_get_tile_raw(pipe, trans, 0, y, width, 1, ztemp, 0);
                 y += yStep;
                 for (j = 0; j < width; j++) {
                    zfloat[j] = (float) (scale * ztemp[j]);
    @@ -522,7 +527,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
              /* RGBA format */
              /* Do a row at a time to flip image data vertically */
              for (i = 0; i < height; i++) {
    -            pipe_get_tile_rgba(trans, 0, y, width, 1, df);
    +            pipe_get_tile_rgba(pipe, trans, 0, y, width, 1, df);
                 y += yStep;
                 df += dfStride;
                 if (!dfStride) {
    @@ -534,7 +539,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
           }
        }
     
    -   screen->tex_transfer_destroy(trans);
    +   pipe->tex_transfer_destroy(pipe, trans);
     
        _mesa_unmap_pbo_dest(ctx, &clippedPacking);
     }
    diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
    index 76846560f9f..04c0ef8c84c 100644
    --- a/src/mesa/state_tracker/st_cb_texture.c
    +++ b/src/mesa/state_tracker/st_cb_texture.c
    @@ -375,7 +375,8 @@ compress_with_blit(GLcontext * ctx,
     {
        const GLuint dstImageOffsets[1] = {0};
        struct st_texture_image *stImage = st_texture_image(texImage);
    -   struct pipe_screen *screen = ctx->st->pipe->screen;
    +   struct pipe_context *pipe = ctx->st->pipe;
    +   struct pipe_screen *screen = pipe->screen;
        gl_format mesa_format;
        struct pipe_texture templ;
        struct pipe_texture *src_tex;
    @@ -425,7 +426,7 @@ compress_with_blit(GLcontext * ctx,
     					     0, 0, 0, /* face, level are zero */
     					     PIPE_TRANSFER_WRITE,
     					     0, 0, width, height); /* x, y, w, h */
    -   map = screen->transfer_map(screen, tex_xfer);
    +   map = pipe->transfer_map(pipe, tex_xfer);
     
        _mesa_texstore(ctx, 2, GL_RGBA, mesa_format,
                       map,              /* dest ptr */
    @@ -437,8 +438,8 @@ compress_with_blit(GLcontext * ctx,
                       pixels,           /* source data */
                       unpack);          /* source data packing */
     
    -   screen->transfer_unmap(screen, tex_xfer);
    -   screen->tex_transfer_destroy(tex_xfer);
    +   pipe->transfer_unmap(pipe, tex_xfer);
    +   pipe->tex_transfer_destroy(pipe, tex_xfer);
     
        /* copy / compress image */
        util_blit_pixels_tex(ctx->st->blit,
    @@ -813,7 +814,8 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level,
                          struct gl_texture_object *texObj,
                          struct gl_texture_image *texImage)
     {
    -   struct pipe_screen *screen = ctx->st->pipe->screen;
    +   struct pipe_context *pipe = ctx->st->pipe;
    +   struct pipe_screen *screen = pipe->screen;
        struct st_texture_image *stImage = st_texture_image(texImage);
        const GLuint width = texImage->Width;
        const GLuint height = texImage->Height;
    @@ -852,7 +854,7 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level,
        if (st_equal_formats(stImage->pt->format, format, type)) {
           /* memcpy */
           const uint bytesPerRow = width * util_format_get_blocksize(stImage->pt->format);
    -      ubyte *map = screen->transfer_map(screen, tex_xfer);
    +      ubyte *map = pipe->transfer_map(pipe, tex_xfer);
           GLuint row;
           for (row = 0; row < height; row++) {
              GLvoid *dest = _mesa_image_address2d(&ctx->Pack, pixels, width,
    @@ -860,7 +862,7 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level,
              memcpy(dest, map, bytesPerRow);
              map += tex_xfer->stride;
           }
    -      screen->transfer_unmap(screen, tex_xfer);
    +      pipe->transfer_unmap(pipe, tex_xfer);
        }
        else {
           /* format translation via floats */
    @@ -875,7 +877,7 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level,
                 debug_printf("%s: fallback format translation\n", __FUNCTION__);
     
              /* get float[4] rgba row from surface */
    -         pipe_get_tile_rgba(tex_xfer, 0, row, width, 1, rgba);
    +         pipe_get_tile_rgba(pipe, tex_xfer, 0, row, width, 1, rgba);
     
              _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba, format,
                                         type, dest, &ctx->Pack, transferOps);
    @@ -1260,7 +1262,6 @@ fallback_copy_texsubimage(GLcontext *ctx, GLenum target, GLint level,
                               GLsizei width, GLsizei height)
     {
        struct pipe_context *pipe = ctx->st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        struct pipe_transfer *src_trans;
        GLvoid *texDest;
        enum pipe_transfer_usage transfer_usage;
    @@ -1313,11 +1314,11 @@ fallback_copy_texsubimage(GLcontext *ctx, GLenum target, GLint level,
           /* To avoid a large temp memory allocation, do copy row by row */
           for (row = 0; row < height; row++, srcY += yStep) {
              uint data[MAX_WIDTH];
    -         pipe_get_tile_z(src_trans, 0, srcY, width, 1, data);
    +         pipe_get_tile_z(pipe, src_trans, 0, srcY, width, 1, data);
              if (scaleOrBias) {
                 _mesa_scale_and_bias_depth_uint(ctx, width, data);
              }
    -         pipe_put_tile_z(stImage->transfer, 0, row, width, 1, data);
    +         pipe_put_tile_z(pipe, stImage->transfer, 0, row, width, 1, data);
           }
        }
        else {
    @@ -1339,7 +1340,7 @@ fallback_copy_texsubimage(GLcontext *ctx, GLenum target, GLint level,
              /* XXX this usually involves a lot of int/float conversion.
               * try to avoid that someday.
               */
    -         pipe_get_tile_rgba(src_trans, 0, 0, width, height, tempSrc);
    +         pipe_get_tile_rgba(pipe, src_trans, 0, 0, width, height, tempSrc);
     
              /* Store into texture memory.
               * Note that this does some special things such as pixel transfer
    @@ -1367,7 +1368,7 @@ fallback_copy_texsubimage(GLcontext *ctx, GLenum target, GLint level,
        }
     
        st_texture_image_unmap(ctx->st, stImage);
    -   screen->tex_transfer_destroy(src_trans);
    +   pipe->tex_transfer_destroy(pipe, src_trans);
     }
     
     
    diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
    index e978415c200..ce1a2ea33cd 100644
    --- a/src/mesa/state_tracker/st_context.c
    +++ b/src/mesa/state_tracker/st_context.c
    @@ -287,7 +287,7 @@ st_make_current(struct st_context *st,
           }
     
           _mesa_check_init_viewport(st->ctx, draw->InitWidth, draw->InitHeight);
    -      st->pipe->priv = winsys_drawable_handle;
    +      st->winsys_drawable_handle = winsys_drawable_handle;
     
           return GL_TRUE;
        }
    diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
    index ae73817c0e6..4f3a67f41b2 100644
    --- a/src/mesa/state_tracker/st_context.h
    +++ b/src/mesa/state_tracker/st_context.h
    @@ -186,6 +186,7 @@ struct st_context
        struct cso_context *cso_context;
     
        int force_msaa;
    +   void *winsys_drawable_handle;
     };
     
     
    diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
    index 8a6e1ed4662..7f45e3f5484 100644
    --- a/src/mesa/state_tracker/st_draw.c
    +++ b/src/mesa/state_tracker/st_draw.c
    @@ -184,7 +184,7 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
           /* this is an odd-ball case */
           assert(type == GL_UNSIGNED_BYTE);
           assert(normalized);
    -      return PIPE_FORMAT_A8R8G8B8_UNORM;
    +      return PIPE_FORMAT_B8G8R8A8_UNORM;
        }
     
        if (normalized) {
    @@ -273,7 +273,8 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
        }
     
        *userSpace = (num_client_arrays == vpv->num_inputs);
    -   /* printf("user space: %d (%d %d)\n", (int) *userSpace,num_client_arrays,vp->num_inputs); */
    +   /* debug_printf("user space: %s (%d arrays, %d inputs)\n",
    +      (int)*userSpace ? "Yes" : "No", num_client_arrays, vp->num_inputs); */
     
        return GL_TRUE;
     }
    @@ -293,6 +294,8 @@ get_arrays_bounds(const struct st_vertex_program *vp,
        const GLubyte *high_addr = NULL;
        GLuint attr;
     
    +   /* debug_printf("get_arrays_bounds: Handling %u attrs\n", vpv->num_inputs); */
    +
        for (attr = 0; attr < vpv->num_inputs; attr++) {
           const GLuint mesaAttr = vp->index_to_input[attr];
           const GLint stride = arrays[mesaAttr]->StrideB;
    @@ -301,6 +304,9 @@ get_arrays_bounds(const struct st_vertex_program *vp,
                                _mesa_sizeof_type(arrays[mesaAttr]->Type));
           const GLubyte *end = start + (max_index * stride) + sz;
     
    +      /* debug_printf("attr %u: stride %d size %u start %p end %p\n",
    +         attr, stride, sz, start, end); */
    +
           if (attr == 0) {
              low_addr = start;
              high_addr = end;
    @@ -348,7 +354,8 @@ setup_interleaved_attribs(GLcontext *ctx,
              const GLubyte *low, *high;
     
              get_arrays_bounds(vp, vpv, arrays, max_index, &low, &high);
    -         /*printf("buffer range: %p %p  %d\n", low, high, high-low);*/
    +         /* debug_printf("buffer range: %p %p range %d max index %u\n",
    +            low, high, high - low, max_index); */
     
              offset0 = low;
              if (userSpace) {
    diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
    index e03dd30f0f5..290ee36b0fe 100644
    --- a/src/mesa/state_tracker/st_extensions.c
    +++ b/src/mesa/state_tracker/st_extensions.c
    @@ -172,6 +172,7 @@ void st_init_extensions(struct st_context *st)
        ctx->Extensions.ARB_vertex_array_object = GL_TRUE;
        ctx->Extensions.ARB_vertex_buffer_object = GL_TRUE;
        ctx->Extensions.ARB_vertex_program = GL_TRUE;
    +   ctx->Extensions.ARB_window_pos = GL_TRUE;
     
        ctx->Extensions.EXT_blend_color = GL_TRUE;
        ctx->Extensions.EXT_blend_func_separate = GL_TRUE;
    @@ -196,9 +197,17 @@ void st_init_extensions(struct st_context *st)
     
        ctx->Extensions.APPLE_vertex_array_object = GL_TRUE;
     
    +   ctx->Extensions.MESA_pack_invert = GL_TRUE;
    +
        ctx->Extensions.NV_blend_square = GL_TRUE;
        ctx->Extensions.NV_texgen_reflection = GL_TRUE;
        ctx->Extensions.NV_texture_env_combine4 = GL_TRUE;
    +   ctx->Extensions.NV_texture_rectangle = GL_TRUE;
    +#if 0
    +   /* possibly could support the following two */
    +   ctx->Extensions.NV_vertex_program = GL_TRUE;
    +   ctx->Extensions.NV_vertex_program1_1 = GL_TRUE;
    +#endif
     
     #if FEATURE_OES_draw_texture
        ctx->Extensions.OES_draw_texture = GL_TRUE;
    @@ -236,7 +245,6 @@ void st_init_extensions(struct st_context *st)
     
        if (screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES)) {
           ctx->Extensions.ARB_texture_non_power_of_two = GL_TRUE;
    -      ctx->Extensions.NV_texture_rectangle = GL_TRUE;
        }
     
        if (screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS) > 1) {
    diff --git a/src/mesa/state_tracker/st_framebuffer.c b/src/mesa/state_tracker/st_framebuffer.c
    index 1d35e8d6574..0a91183f89d 100644
    --- a/src/mesa/state_tracker/st_framebuffer.c
    +++ b/src/mesa/state_tracker/st_framebuffer.c
    @@ -167,9 +167,7 @@ st_set_framebuffer_surface(struct st_framebuffer *stfb,
                                uint surfIndex, struct pipe_surface *surf)
     {
        GET_CURRENT_CONTEXT(ctx);
    -   static const GLuint invalid_size = 9999999;
        struct st_renderbuffer *strb;
    -   GLuint width, height, i;
     
        /* sanity checks */
        assert(ST_SURFACE_FRONT_LEFT == BUFFER_FRONT_LEFT);
    @@ -183,18 +181,17 @@ st_set_framebuffer_surface(struct st_framebuffer *stfb,
        strb = st_renderbuffer(stfb->Base.Attachment[surfIndex].Renderbuffer);
     
        if (!strb) {
    -      if (surfIndex == ST_SURFACE_FRONT_LEFT) {
    -         /* Delayed creation when the window system supplies a fake front buffer */
    -         struct st_renderbuffer *strb_back
    -            = st_renderbuffer(stfb->Base.Attachment[ST_SURFACE_BACK_LEFT].Renderbuffer);
    -         struct gl_renderbuffer *rb
    -            = st_new_renderbuffer_fb(surf->format, strb_back->Base.NumSamples, FALSE);
    -         _mesa_add_renderbuffer(&stfb->Base, BUFFER_FRONT_LEFT, rb);
    -         strb = st_renderbuffer(rb);
    -      } else {
    -         /* fail */
    +      /* create new renderbuffer for this surface now */
    +      const GLuint numSamples = stfb->Base.Visual.samples;
    +      struct gl_renderbuffer *rb =
    +         st_new_renderbuffer_fb(surf->format, numSamples, FALSE);
    +      if (!rb) {
    +         /* out of memory */
    +         _mesa_warning(ctx, "Out of memory allocating renderbuffer");
              return;
           }
    +      _mesa_add_renderbuffer(&stfb->Base, surfIndex, rb);
    +      strb = st_renderbuffer(rb);
        }
     
        /* replace the renderbuffer's surface/texture pointers */
    @@ -206,39 +203,16 @@ st_set_framebuffer_surface(struct st_framebuffer *stfb,
            * But when we do, we need to start setting this dirty bit
            * to ensure the renderbuffer attachements are up-to-date
            * via update_framebuffer.
    +       * Core Mesa's state validation will update the parent framebuffer's
    +       * size info, etc.
            */
           ctx->st->dirty.st |= ST_NEW_FRAMEBUFFER;
    +      ctx->NewState |= _NEW_BUFFERS;
        }
     
        /* update renderbuffer's width/height */
        strb->Base.Width = surf->width;
        strb->Base.Height = surf->height;
    -
    -   /* Try to update the framebuffer's width/height from the renderbuffer
    -    * sizes.  Before we start drawing, all the rbs _should_ be the same size.
    -    */
    -   width = height = invalid_size;
    -   for (i = 0; i < BUFFER_COUNT; i++) {
    -      if (stfb->Base.Attachment[i].Renderbuffer) {
    -         if (width == invalid_size) {
    -            width = stfb->Base.Attachment[i].Renderbuffer->Width;
    -            height = stfb->Base.Attachment[i].Renderbuffer->Height;
    -         }
    -         else if (width != stfb->Base.Attachment[i].Renderbuffer->Width ||
    -                  height != stfb->Base.Attachment[i].Renderbuffer->Height) {
    -            /* inconsistant renderbuffer sizes, bail out */
    -            return;
    -         }
    -      }
    -   }
    -
    -   if (width != invalid_size) {
    -      /* OK, the renderbuffers are of a consistant size, so update the
    -       * parent framebuffer's size.
    -       */
    -      stfb->Base.Width = width;
    -      stfb->Base.Height = height;
    -   }
     }
     
     
    diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
    index f67d7b4cb5c..b2521433c87 100644
    --- a/src/mesa/state_tracker/st_gen_mipmap.c
    +++ b/src/mesa/state_tracker/st_gen_mipmap.c
    @@ -106,7 +106,6 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target,
                              struct gl_texture_object *texObj)
     {
        struct pipe_context *pipe = ctx->st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        struct pipe_texture *pt = st_get_texobj_texture(texObj);
        const uint baseLevel = texObj->BaseLevel;
        const uint lastLevel = pt->last_level;
    @@ -142,8 +141,8 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target,
     						u_minify(pt->width0, dstLevel),
     						u_minify(pt->height0, dstLevel));
     
    -      srcData = (ubyte *) screen->transfer_map(screen, srcTrans);
    -      dstData = (ubyte *) screen->transfer_map(screen, dstTrans);
    +      srcData = (ubyte *) pipe->transfer_map(pipe, srcTrans);
    +      dstData = (ubyte *) pipe->transfer_map(pipe, dstTrans);
     
           srcStride = srcTrans->stride / util_format_get_blocksize(srcTrans->texture->format);
           dstStride = dstTrans->stride / util_format_get_blocksize(dstTrans->texture->format);
    @@ -161,11 +160,11 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target,
                                       dstData,
                                       dstStride); /* stride in texels */
     
    -      screen->transfer_unmap(screen, srcTrans);
    -      screen->transfer_unmap(screen, dstTrans);
    +      pipe->transfer_unmap(pipe, srcTrans);
    +      pipe->transfer_unmap(pipe, dstTrans);
     
    -      screen->tex_transfer_destroy(srcTrans);
    -      screen->tex_transfer_destroy(dstTrans);
    +      pipe->tex_transfer_destroy(pipe, srcTrans);
    +      pipe->tex_transfer_destroy(pipe, dstTrans);
        }
     }
     
    diff --git a/src/mesa/state_tracker/st_inlines.h b/src/mesa/state_tracker/st_inlines.h
    index e105870bc75..7fcde7b1a96 100644
    --- a/src/mesa/state_tracker/st_inlines.h
    +++ b/src/mesa/state_tracker/st_inlines.h
    @@ -53,11 +53,11 @@ st_cond_flush_get_tex_transfer(struct st_context *st,
     			       unsigned int x, unsigned int y,
     			       unsigned int w, unsigned int h)
     {
    -   struct pipe_screen *screen = st->pipe->screen;
    +   struct pipe_context *context = st->pipe;
     
        st_teximage_flush_before_map(st, pt, face, level, usage);
    -   return screen->get_tex_transfer(screen, pt, face, level, zslice, usage,
    -				   x, y, w, h);
    +   return context->get_tex_transfer(context, pt, face, level, zslice, usage,
    +				    x, y, w, h);
     }
     
     static INLINE struct pipe_transfer *
    @@ -70,9 +70,9 @@ st_no_flush_get_tex_transfer(struct st_context *st,
     			     unsigned int x, unsigned int y,
     			     unsigned int w, unsigned int h)
     {
    -   struct pipe_screen *screen = st->pipe->screen;
    +   struct pipe_context *context = st->pipe;
     
    -   return screen->get_tex_transfer(screen, pt, face, level,
    +   return context->get_tex_transfer(context, pt, face, level,
     				   zslice, usage, x, y, w, h);
     }
     
    diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
    index 5a45c4358a9..10a38befb41 100644
    --- a/src/mesa/state_tracker/st_texture.c
    +++ b/src/mesa/state_tracker/st_texture.c
    @@ -192,7 +192,6 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage,
                          GLuint x, GLuint y, GLuint w, GLuint h)
     {
        struct pipe_context *pipe = st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        struct pipe_texture *pt = stImage->pt;
     
        DBG("%s \n", __FUNCTION__);
    @@ -202,7 +201,7 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage,
     						    usage, x, y, w, h);
     
        if (stImage->transfer)
    -      return screen->transfer_map(screen, stImage->transfer);
    +      return pipe->transfer_map(pipe, stImage->transfer);
        else
           return NULL;
     }
    @@ -212,13 +211,13 @@ void
     st_texture_image_unmap(struct st_context *st,
                            struct st_texture_image *stImage)
     {
    -   struct pipe_screen *screen = st->pipe->screen;
    +   struct pipe_context *pipe = st->pipe;
     
        DBG("%s\n", __FUNCTION__);
     
    -   screen->transfer_unmap(screen, stImage->transfer);
    +   pipe->transfer_unmap(pipe, stImage->transfer);
     
    -   screen->tex_transfer_destroy(stImage->transfer);
    +   pipe->tex_transfer_destroy(pipe, stImage->transfer);
     }
     
     
    @@ -238,8 +237,7 @@ st_surface_data(struct pipe_context *pipe,
     		const void *src, unsigned src_stride,
     		unsigned srcx, unsigned srcy, unsigned width, unsigned height)
     {
    -   struct pipe_screen *screen = pipe->screen;
    -   void *map = screen->transfer_map(screen, dst);
    +   void *map = pipe->transfer_map(pipe, dst);
     
        assert(dst->texture);
        util_copy_rect(map,
    @@ -250,7 +248,7 @@ st_surface_data(struct pipe_context *pipe,
                       src, src_stride, 
                       srcx, srcy);
     
    -   screen->transfer_unmap(screen, dst);
    +   pipe->transfer_unmap(pipe, dst);
     }
     
     
    @@ -265,7 +263,6 @@ st_texture_image_data(struct st_context *st,
                           GLuint src_row_stride, GLuint src_image_stride)
     {
        struct pipe_context *pipe = st->pipe;
    -   struct pipe_screen *screen = pipe->screen;
        GLuint depth = u_minify(dst->depth0, level);
        GLuint i;
        const GLubyte *srcUB = src;
    @@ -287,7 +284,7 @@ st_texture_image_data(struct st_context *st,
     		      u_minify(dst->width0, level),
                           u_minify(dst->height0, level));      /* width, height */
     
    -      screen->tex_transfer_destroy(dst_transfer);
    +      pipe->tex_transfer_destroy(pipe, dst_transfer);
     
           srcUB += src_image_stride;
        }
    diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c
    index 3e36cf9a7e5..ed637cac124 100644
    --- a/src/mesa/swrast/s_depth.c
    +++ b/src/mesa/swrast/s_depth.c
    @@ -526,7 +526,7 @@ _swrast_depth_clamp_span( GLcontext *ctx, SWspan *span )
     
        /* Convert floating point values in [0,1] to device Z coordinates in
         * [0, DepthMax].
    -    * ex: If the the Z buffer has 24 bits, DepthMax = 0xffffff.
    +    * ex: If the Z buffer has 24 bits, DepthMax = 0xffffff.
         * 
         * XXX this all falls apart if we have 31 or more bits of Z because
         * the triangle rasterization code produces unsigned Z values.  Negative
    diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c
    index 29f070686fe..687c8eb0bf8 100644
    --- a/src/mesa/swrast/s_span.c
    +++ b/src/mesa/swrast/s_span.c
    @@ -60,6 +60,7 @@
     void
     _swrast_span_default_attribs(GLcontext *ctx, SWspan *span)
     {
    +   GLchan r, g, b, a;
        /* Z*/
        {
           const GLfloat depthMax = ctx->DrawBuffer->_DepthMaxF;
    @@ -80,7 +81,6 @@ _swrast_span_default_attribs(GLcontext *ctx, SWspan *span)
        span->attrStepY[FRAG_ATTRIB_WPOS][3] = 0.0;
     
        /* primary color, or color index */
    -   GLchan r, g, b, a;
        UNCLAMPED_FLOAT_TO_CHAN(r, ctx->Current.RasterColor[0]);
        UNCLAMPED_FLOAT_TO_CHAN(g, ctx->Current.RasterColor[1]);
        UNCLAMPED_FLOAT_TO_CHAN(b, ctx->Current.RasterColor[2]);
    diff --git a/src/mesa/vbo/vbo_save_loopback.c b/src/mesa/vbo/vbo_save_loopback.c
    index f253c854d2b..3f581ea02da 100644
    --- a/src/mesa/vbo/vbo_save_loopback.c
    +++ b/src/mesa/vbo/vbo_save_loopback.c
    @@ -78,7 +78,7 @@ struct loopback_attr {
     };
     
     /* Don't emit ends and begins on wrapped primitives.  Don't replay
    - * wrapped vertices.  If we get here, it's probably because the the
    + * wrapped vertices.  If we get here, it's probably because the
      * precalculated wrapping is wrong.
      */
     static void loopback_prim( GLcontext *ctx,
    diff --git a/windows/VC8/mesa/.gitignore b/windows/VC8/mesa/.gitignore
    new file mode 100644
    index 00000000000..d89aab109b1
    --- /dev/null
    +++ b/windows/VC8/mesa/.gitignore
    @@ -0,0 +1,30 @@
    +debug/
    +debug static crt/
    +release/
    +release static crt/
    +gdi/Debug/
    +gdi/Debug Static CRT/
    +gdi/Release/
    +gdi/Release Static CRT/
    +gdi/*.user
    +glsl_apps_compile/Debug
    +glsl_apps_compile/Release
    +glsl_apps_compile/*.user
    +glu/Debug/
    +glu/Debug Static CRT/
    +glu/Release/
    +glu/Release Static CRT/
    +glu/*.user
    +mesa.ncb
    +mesa.suo
    +mesa/Debug/
    +mesa/Debug Static CRT/
    +mesa/Release/
    +mesa/Release Static CRT/
    +mesa/*.user
    +osmesa/Debug/
    +osmesa/Debug Static CRT/
    +osmesa/Release/
    +osmesa/Release Static CRT/
    +osmesa/*.user
    +
    diff --git a/windows/VC8/mesa/mesa/mesa.vcproj b/windows/VC8/mesa/mesa/mesa.vcproj
    index 87957469168..5a5b93a7799 100644
    --- a/windows/VC8/mesa/mesa/mesa.vcproj
    +++ b/windows/VC8/mesa/mesa/mesa.vcproj
    @@ -494,6 +494,10 @@
     				RelativePath="..\..\..\..\src\mesa\glapi\glapi_dispatch.c"
     				>
     			
    +			
    +			
     			
    @@ -538,7 +542,7 @@
     					>
     					
     				
     				
     					
     				
     				
     					
     				
     				
     					
     				
     			
    @@ -1844,7 +1848,7 @@
     					
    @@ -1855,7 +1859,7 @@
     					
    @@ -1866,7 +1870,7 @@
     					
    @@ -1877,7 +1881,7 @@
     					
    @@ -1896,7 +1900,7 @@
     					
    @@ -1907,7 +1911,7 @@
     					
    @@ -1918,7 +1922,7 @@
     					
    @@ -1929,7 +1933,7 @@
     					
    @@ -1944,7 +1948,7 @@
     					
    @@ -1955,7 +1959,7 @@
     					
    @@ -1966,7 +1970,7 @@
     					
    @@ -1977,7 +1981,7 @@
     					
    @@ -1996,7 +2000,7 @@
     					
    @@ -2007,7 +2011,7 @@
     					
    @@ -2018,7 +2022,7 @@
     					
    @@ -2029,7 +2033,7 @@
     					
    @@ -2064,7 +2068,7 @@
     					
    @@ -2075,7 +2079,7 @@
     					
    @@ -2086,7 +2090,7 @@
     					
    @@ -2097,7 +2101,7 @@
     					
    @@ -2116,7 +2120,7 @@
     					
    @@ -2127,7 +2131,7 @@
     					
    @@ -2138,7 +2142,7 @@
     					
    @@ -2149,7 +2153,7 @@
     					
    @@ -2208,7 +2212,7 @@
     					
    @@ -2219,7 +2223,7 @@
     					
    @@ -2230,7 +2234,7 @@
     					
    @@ -2241,7 +2245,7 @@
     					
    diff --git a/windows/VC8/progs/.gitignore b/windows/VC8/progs/.gitignore
    new file mode 100644
    index 00000000000..b1d3677c1b6
    --- /dev/null
    +++ b/windows/VC8/progs/.gitignore
    @@ -0,0 +1,17 @@
    +debug/
    +debug static crt/
    +release/
    +release static crt/
    +demos/Debug/
    +demos/Debug Static CRT/
    +demos/Release/
    +demos/Release Static CRT/
    +demos/*.user
    +glut/Debug/
    +glut/Debug Static CRT/
    +glut/Release/
    +glut/Release Static CRT/
    +glut/*.user
    +progs.ncb
    +progs.suo
    +